From 328cbbffafd1469bb20d5c4dcf5cbe93ccac168d Mon Sep 17 00:00:00 2001 From: Jacob Vogel Date: Thu, 22 Aug 2019 14:26:37 -0400 Subject: [PATCH] removed excess files --- Genes_from_other_studies.ipynb | 1976 -------------------------------- 1 file changed, 1976 deletions(-) delete mode 100644 Genes_from_other_studies.ipynb diff --git a/Genes_from_other_studies.ipynb b/Genes_from_other_studies.ipynb deleted file mode 100644 index 1a2f79c..0000000 --- a/Genes_from_other_studies.ipynb +++ /dev/null @@ -1,1976 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "import os\n", - "from glob import glob\n", - "import numpy as np\n", - "import pandas\n", - "from difflib import get_close_matches" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "probes = pandas.read_csv('/home/users/jvogel/Science/Allen_Human_Brain_Atlas/normalized_microarray_donor10021/Probes.csv')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Get other studies" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Oreilly_DOreilly_D_entrezOreilly_VOreilly_V_entrezCembrowski2017_VCembrowski2017_DThompsonChristensen_vChristensen_dDong
0Cyp1b125426.0Caln1363909.0Cntnap5cCdh7Sema5bSstr1AsamCol5a1
1Cyp26b1312495.0Crtac1171438.0Ntng2EmbSema5aRab3bLoc498835Dcn
2Ntf381737.0Dcn29139.0Pcdh20Fn1Robo1Odz4Nt5Matn2
3NTS299757.0Gda83585.0Robo1NtsEphb1Stk23Plscr1Fbln2
4Olfml2b304960.0Cadm1363058.0Sema3aRxfp1Epha7Fxyd6PplLct
\n", - "
" - ], - "text/plain": [ - " Oreilly_D Oreilly_D_entrez Oreilly_V Oreilly_V_entrez Cembrowski2017_V \\\n", - "0 Cyp1b1 25426.0 Caln1 363909.0 Cntnap5c \n", - "1 Cyp26b1 312495.0 Crtac1 171438.0 Ntng2 \n", - "2 Ntf3 81737.0 Dcn 29139.0 Pcdh20 \n", - "3 NTS 299757.0 Gda 83585.0 Robo1 \n", - "4 Olfml2b 304960.0 Cadm1 363058.0 Sema3a \n", - "\n", - " Cembrowski2017_D Thompson Christensen_v Christensen_d Dong \n", - "0 Cdh7 Sema5b Sstr1 Asam Col5a1 \n", - "1 Emb Sema5a Rab3b Loc498835 Dcn \n", - "2 Fn1 Robo1 Odz4 Nt5 Matn2 \n", - "3 Nts Ephb1 Stk23 Plscr1 Fbln2 \n", - "4 Rxfp1 Epha7 Fxyd6 Ppl Lct " - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "studies = pandas.ExcelFile('/Users/jakevogel/Dropbox/Work/Projects/AHBA_Hippocampus_AP/Other_papers.xlsx'\n", - " ).parse('Sheet1')\n", - "studies.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Prepare Leonardos" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "leo = pandas.ExcelFile('/Users/jakevogel/Dropbox/Work/Projects/AHBA_Hippocampus_AP/Leonardo_2006_Table.xls'\n", - " ).parse('Sheet1')" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "leo_d = leo[leo['V/D Fold change']<-2.529]['Name'].values\n", - "leo_v = leo[leo['V/D Fold change']>3.13]['Name'].values" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": {}, - "outputs": [], - "source": [ - "for i,gene in enumerate(leo_d):\n", - " matches = get_close_matches(gene, probes.gene_name.unique())\n", - " #print(gene,'\\n',matches,'\\n\\n')\n", - " if len(matches)>0:\n", - " hit = probes[probes['gene_name']==matches[0]\n", - " ]['gene_symbol'].values[0]\n", - " studies.loc[i,'Leonardo_2006_dorsal'] = hit\n" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": { - "collapsed": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "RIKEN cDNA 2810454P21 gene \n", - " [] \n", - "\n", - "\n", - "regulated endocrine-specific protein 18 \n", - " ['regulated endocrine-specific protein 18 homolog (rat)', 'lymphocyte-specific protein 1', 'NOTCH-regulated ankyrin repeat protein'] \n", - "\n", - "\n", - "Keep it? y\n", - "coatomer protein complex, subunit gamma 2, antisense 2 \n", - " ['coatomer protein complex, subunit gamma 2', 'coatomer protein complex, subunit gamma 1', 'coatomer protein complex, subunit zeta 2'] \n", - "\n", - "\n", - "Keep it? y\n", - "tissue inhibitor of metalloproteinase 2 \n", - " ['X-linked inhibitor of apoptosis'] \n", - "\n", - "\n", - "Keep it? n\n", - "coatomer protein complex, subunit gamma 2, antisense 2 \n", - " ['coatomer protein complex, subunit gamma 2', 'coatomer protein complex, subunit gamma 1', 'coatomer protein complex, subunit zeta 2'] \n", - "\n", - "\n", - "Keep it? y\n", - "RIKEN cDNA 2210017A09 gene \n", - " [] \n", - "\n", - "\n", - "slit homolog 2 (Drosophila) \n", - " ['slit homolog 2 (Drosophila)', 'slit homolog 3 (Drosophila)', 'slit homolog 1 (Drosophila)'] \n", - "\n", - "\n", - "Keep it? y\n", - "expressed sequence AI527257 \n", - " [] \n", - "\n", - "\n", - "tissue inhibitor of metalloproteinase 2 \n", - " ['X-linked inhibitor of apoptosis'] \n", - "\n", - "\n", - "Keep it? n\n", - "tissue inhibitor of metalloproteinase 2 \n", - " ['X-linked inhibitor of apoptosis'] \n", - "\n", - "\n", - "Keep it? n\n", - "ESTs \n", - " [] \n", - "\n", - "\n", - "brain and kidney protein \n", - " ['renin binding protein', 'dynamin binding protein', 'acrosin binding protein'] \n", - "\n", - "\n", - "Keep it? n\n", - "netrin G1 \n", - " ['netrin G1', 'netrin 1', 'netrin G2'] \n", - "\n", - "\n", - "Keep it? y\n", - "suppressor of cytokine signaling 2 \n", - " ['suppressor of cytokine signaling 2', 'suppressor of cytokine signaling 7', 'suppressor of cytokine signaling 6'] \n", - "\n", - "\n", - "Keep it? y\n", - "neuronatin \n", - " ['neuronatin', 'neuroplastin', 'neurocan'] \n", - "\n", - "\n", - "Keep it? y\n", - "hypothetical protein LOC225642 \n", - " ['hypothetical protein LOC729569', 'hypothetical protein LOC642031', 'hypothetical protein LOC286434'] \n", - "\n", - "\n", - "Keep it? n\n", - "decorin \n", - " ['decorin', 'recoverin', 'doublecortin'] \n", - "\n", - "\n", - "Keep it? y\n" - ] - } - ], - "source": [ - "for i,gene in enumerate(leo_v):\n", - " matches = get_close_matches(gene, probes.gene_name.unique())\n", - " query = True\n", - " print(gene,'\\n',matches,'\\n\\n')\n", - " if len(matches)>0:\n", - " while query:\n", - " ans = input('Keep it? ')\n", - " if ans == 'y':\n", - " hit = probes[probes['gene_name']==matches[0]\n", - " ]['gene_symbol'].values[0]\n", - " studies.loc[i,'Leonardo_2006_ventral'] = hit\n", - " query = False\n", - " \n" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "allvals = sorted(abs(leo['V/D Fold change'].values)[1:])\n", - "onep = len(allvals) - int(len(allvals)*0.01)\n", - "vmax = allvals[onep]\n", - "leo_thr = leo[abs(leo['V/D Fold change']) > vmax]\n", - "leo_d_1p = []\n", - "leo_v_1p = []\n", - "for i,row in leo_thr.iterrows():\n", - " if row['V/D Fold change'] > 0:\n", - " leo_v_1p.append(leo_thr.loc[i,'Name'])\n", - " else:\n", - " leo_d_1p.append(leo_thr.loc[i,'Name'])" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "studies = studies.reindex(index=range(300))" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": { - "collapsed": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "RIKEN cDNA B930011D01 gene \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA B430320C24 gene \n", - " [] \n", - "\n", - "\n", - "differential display and activated by p53 \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 2410022L05 gene \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 1810037C22 gene \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 4930434H03 gene \n", - " [] \n", - "\n", - "\n", - "expressed sequence AI315208 \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 1300010A20 gene \n", - " [] \n", - "\n", - "\n", - "Mus musculus, clone IMAGE:4161424, mRNA \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 0710001E13 gene \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 1300002C13 gene \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 4933440J18 gene \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA A330103B05 gene \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 1300002C13 gene \n", - " [] \n", - "\n", - "\n", - "ESTs, Highly similar to JN0609 nitric-oxide synthase (EC 1.14.13.39), neuronal - mouse [M.musculus] \n", - " [] \n", - "\n", - "\n", - "expressed sequence AA536730 \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 2310047E01 gene \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 9030408N13 gene \n", - " [] \n", - "\n", - "\n", - "ESTs \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA A330103B05 gene \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 2900075G08 gene \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA A330103B05 gene \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 2410012A13 gene \n", - " [] \n", - "\n", - "\n", - "5-hydroxytryptamine (serotonin) receptor 2C \n", - " ['5-hydroxytryptamine (serotonin) receptor 3C, ionotropic', '5-hydroxytryptamine (serotonin) receptor 7 pseudogene 1', '5-hydroxytryptamine (serotonin) receptor 3E, ionotropic'] \n", - "\n", - "\n", - "Keep it? 5-hydroxytryptamine (serotonin) receptor 2C, G protein-coupled\n", - "RIKEN cDNA 2810454P21 gene \n", - " [] \n", - "\n", - "\n", - "regulated endocrine-specific protein 18 \n", - " ['regulated endocrine-specific protein 18 homolog (rat)', 'lymphocyte-specific protein 1', 'NOTCH-regulated ankyrin repeat protein'] \n", - "\n", - "\n", - "Keep it? y\n", - "coatomer protein complex, subunit gamma 2, antisense 2 \n", - " ['coatomer protein complex, subunit gamma 2', 'coatomer protein complex, subunit gamma 1', 'coatomer protein complex, subunit zeta 2'] \n", - "\n", - "\n", - "Keep it? y\n", - "coatomer protein complex, subunit gamma 2, antisense 2 \n", - " ['coatomer protein complex, subunit gamma 2', 'coatomer protein complex, subunit gamma 1', 'coatomer protein complex, subunit zeta 2'] \n", - "\n", - "\n", - "Keep it? y\n", - "RIKEN cDNA 2210017A09 gene \n", - " [] \n", - "\n", - "\n", - "slit homolog 2 (Drosophila) \n", - " ['slit homolog 2 (Drosophila)', 'slit homolog 3 (Drosophila)', 'slit homolog 1 (Drosophila)'] \n", - "\n", - "\n", - "Keep it? y\n", - "expressed sequence AI527257 \n", - " [] \n", - "\n", - "\n", - "ESTs \n", - " [] \n", - "\n", - "\n", - "brain and kidney protein \n", - " ['renin binding protein', 'dynamin binding protein', 'acrosin binding protein'] \n", - "\n", - "\n", - "Keep it? n\n", - "netrin G1 \n", - " ['netrin G1', 'netrin 1', 'netrin G2'] \n", - "\n", - "\n", - "Keep it? y\n", - "suppressor of cytokine signaling 2 \n", - " ['suppressor of cytokine signaling 2', 'suppressor of cytokine signaling 7', 'suppressor of cytokine signaling 6'] \n", - "\n", - "\n", - "Keep it? y\n", - "neuronatin \n", - " ['neuronatin', 'neuroplastin', 'neurocan'] \n", - "\n", - "\n", - "Keep it? y\n", - "hypothetical protein LOC225642 \n", - " ['hypothetical protein LOC729569', 'hypothetical protein LOC642031', 'hypothetical protein LOC286434'] \n", - "\n", - "\n", - "Keep it? n\n", - "decorin \n", - " ['decorin', 'recoverin', 'doublecortin'] \n", - "\n", - "\n", - "Keep it? y\n" - ] - } - ], - "source": [ - "#busts = []\n", - "for i,gene in enumerate(leo_v_1p):\n", - " if gene in busts:\n", - " continue\n", - " if pandas.notnull(studies.loc[i,'Leonardo_2006_ventral_top1p']):\n", - " continue\n", - " if not pandas.notnull(gene):\n", - " continue\n", - " matches = get_close_matches(gene, probes.gene_name.unique())\n", - " query = True\n", - " print(gene,'\\n',matches,'\\n\\n')\n", - " if len(matches)>0:\n", - " while query:\n", - " ans = input('Keep it? ')\n", - " if ans == 'y':\n", - " hit = probes[probes['gene_name']==matches[0]\n", - " ]['gene_symbol'].values[0]\n", - " studies.loc[i,'Leonardo_2006_ventral_top1p'] = hit\n", - " elif ans.isdigit():\n", - " hit = probes[probes['gene_name']==matches[int(ans)]\n", - " ]['gene_symbol'].values[0]\n", - " studies.loc[i,'Leonardo_2006_ventral_top1p'] = hit\n", - " elif len(ans) > 2:\n", - " hit = probes[probes['gene_name']==ans\n", - " ]['gene_symbol'].values[0]\n", - " studies.loc[i,'Leonardo_2006_ventral_top1p'] = hit\n", - " else:\n", - " busts.append(gene)\n", - " query = False\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 114, - "metadata": { - "collapsed": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ESTs, Weakly similar to klotho [Mus musculus] [M.musculus] \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 4930526B11 gene \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 1190002N15 gene \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 2810011M08 gene \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 4930526B11 gene \n", - " [] \n", - "\n", - "\n", - "Mus musculus, Similar to RAS protein-specific guanine nucleotide-releasing factor 1, clone MGC:36047 IMAGE:5367388, mRNA, complete cds \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 1190002N15 gene \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 1200009K10 gene \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 1200017E04 gene \n", - " [] \n", - "\n", - "\n", - "brain and heart sodium channel beta 3 subunit \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 4833414B02 gene \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 2310043N10 gene \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 4933402K10 gene \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 6330415F13 gene \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 3110003A17 gene \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 6330415F13 gene \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 2700050C12 gene \n", - " [] \n", - "\n", - "\n", - "RIKEN cDNA 6330527O06 gene \n", - " [] \n", - "\n", - "\n", - "solute carrier family 17 (vesicular glutamate transporter), member 6 \n", - " ['solute carrier family 1 (glutamate transporter), member 7', 'solute carrier family 32 (GABA vesicular transporter), member 1', 'solute carrier family 26 (sulfate transporter), member 2'] \n", - "\n", - "\n", - "Keep it? solute carrier family 17 (sodium-dependent inorganic phosphate cotransporter), member 6\n", - "monoglyceride lipase \n", - " ['monoglyceride lipase', 'glycerate kinase', 'glycerol kinase'] \n", - "\n", - "\n", - "Keep it? y\n", - "protein phosphatase 1, regulatory (inhibitor) subunit 14A \n", - " ['protein phosphatase 1, regulatory (inhibitor) subunit 14A', 'protein phosphatase 1, regulatory (inhibitor) subunit 1A', 'protein phosphatase 1, regulatory (inhibitor) subunit 14D'] \n", - "\n", - "\n", - "Keep it? y\n" - ] - } - ], - "source": [ - "busts = []\n", - "for i,gene in enumerate(leo_d_1p):\n", - " if gene in busts:\n", - " continue\n", - " if pandas.notnull(studies.loc[i,'Leonardo_2006_dorsal_top1p']):\n", - " continue\n", - " if not pandas.notnull(gene):\n", - " continue\n", - " matches = get_close_matches(gene, probes.gene_name.unique())\n", - " query = True\n", - " print(gene,'\\n',matches,'\\n\\n')\n", - " if len(matches)>0:\n", - " while query:\n", - " ans = input('Keep it? ')\n", - " if ans == 'y':\n", - " hit = probes[probes['gene_name']==matches[0]\n", - " ]['gene_symbol'].values[0]\n", - " studies.loc[i,'Leonardo_2006_dorsal_top1p'] = hit\n", - " elif ans.isdigit():\n", - " hit = probes[probes['gene_name']==matches[int(ans)]\n", - " ]['gene_symbol'].values[0]\n", - " studies.loc[i,'Leonardo_2006_dorsal_top1p'] = hit\n", - " elif len(ans) > 2:\n", - " hit = probes[probes['gene_name']==ans\n", - " ]['gene_symbol'].values[0]\n", - " studies.loc[i,'Leonardo_2006_dorsal_top1p'] = hit\n", - " else:\n", - " busts.append(gene)\n", - " query = False\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 116, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "studies.to_csv('/Users/jakevogel/Science/AHBA/Other_Studies_Genes.csv')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Prepare Lees" - ] - }, - { - "cell_type": "code", - "execution_count": 124, - "metadata": {}, - "outputs": [], - "source": [ - "lee_allv = pandas.ExcelFile('/Users/jakevogel/Dropbox/Work/Projects/AHBA_Hippocampus_AP/Lee_2017_TableS2.xlsx'\n", - " ).parse('Ventrally-enriched genes')\n", - "lee_allv.columns = lee_allv.loc[1,:]\n", - "lee_allv.drop([0,1],inplace=True)\n", - "lee_alld = pandas.ExcelFile('/Users/jakevogel/Dropbox/Work/Projects/AHBA_Hippocampus_AP/Lee_2017_TableS2.xlsx'\n", - " ).parse('Dorsally-enriched genes') \n", - "lee_alld.columns = lee_alld.loc[1,:]\n", - "lee_alld.drop([0,1],inplace=True)\n", - "\n", - "leev = lee_allv.gene_symbol.values\n", - "studies.loc[:,'lee2017_ventral'] = leev.tolist() + ([np.nan]*(len(studies)-len(leev)))\n", - "leed = lee_alld.gene_symbol.values\n", - "studies.loc[:,'lee2017_dorsal'] = leed.tolist() + ([np.nan]*(len(studies)-len(leed)))" - ] - }, - { - "cell_type": "code", - "execution_count": 127, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "studies = studies.reindex(index=range(1000))" - ] - }, - { - "cell_type": "code", - "execution_count": 128, - "metadata": {}, - "outputs": [], - "source": [ - "leeall = pandas.ExcelFile('/Users/jakevogel/Dropbox/Work/Projects/AHBA_Hippocampus_AP/Lee_2017_TableS1.xlsx')\n", - "for sheet in leeall.sheet_names:\n", - " df = leeall.parse(sheet)\n", - " df.columns = df.loc[1,:]\n", - " df.drop([0,1],inplace=True)\n", - " leev = df[df['%s_log2FC'%sheet]>0].gene_symbol.values\n", - " studies.loc[:,'lee2017_ventral_%s'%sheet] = leev.tolist() + ([np.nan]*(len(studies)-len(leev)))\n", - " leed = df[df['%s_log2FC'%sheet]<0].gene_symbol.values\n", - " studies.loc[:,'lee2017_dorsal_%s'%sheet] = leev.tolist() + ([np.nan]*(len(studies)-len(leev)))" - ] - }, - { - "cell_type": "code", - "execution_count": 130, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "studies.to_csv('/Users/jakevogel/Science/AHBA/Other_Studies_Genes.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": 131, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "studies.to_csv('/Users/jakevogel/Dropbox/Work/Projects/AHBA_Hippocampus_AP/MANUSCRIPT/Natures/NatureComms/Revision/Other_Studies_Genes.csv')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Cembrowskis" - ] - }, - { - "cell_type": "code", - "execution_count": 137, - "metadata": {}, - "outputs": [], - "source": [ - "cem = pandas.ExcelFile('/Users/jakevogel/Dropbox/Work/Projects/AHBA_Hippocampus_AP/Cembrowski_2013_Table.xls'\n", - " ).parse('tableS2R1.txt')\n", - "cemv = cem[cem.enrichedRegion=='ventral']['geneShortName'].values\n", - "studies.loc[:,'Cembrowski2013_ventral'] = cemv.tolist() + ([np.nan]*(len(studies)-len(cemv)))\n", - "cemd = cem[cem.enrichedRegion=='dorsal']['geneShortName'].values\n", - "studies.loc[:,'Cembrowski2013_dorsal'] = cemd.tolist() + ([np.nan]*(len(studies)-len(cemd)))" - ] - }, - { - "cell_type": "code", - "execution_count": 138, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "studies.to_csv('/Users/jakevogel/Dropbox/Work/Projects/AHBA_Hippocampus_AP/MANUSCRIPT/Natures/NatureComms/Revision/Other_Studies_Genes.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": 141, - "metadata": {}, - "outputs": [], - "source": [ - "studies.drop([x for x in studies.columns if 'entrez' in x],axis=1,inplace=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Compare to my study" - ] - }, - { - "cell_type": "code", - "execution_count": 149, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'GAL'" - ] - }, - "execution_count": 149, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gene" - ] - }, - { - "cell_type": "code", - "execution_count": 154, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['GAL', 'A_24_P102119', 'ONECUT2', 'PDLIM5', 'COL5A2', 'KDELR3',\n", - " 'GREM2', 'HPSE2', 'DDC', 'CTXN3', 'FAM43B', 'A_24_P62668', 'PVALB',\n", - " 'CASR', 'WNT10A', 'TTR', 'FSTL4', 'TNNT2', 'A_32_P121537',\n", - " 'A_24_P401842', 'DGKI', 'C1orf133', 'RP11-291L15.2', 'NTN1',\n", - " 'OSBPL3', 'TPBG', 'BNC2', 'BDKRB1', 'RGMA', 'GRHL2',\n", - " 'RP11-561O23.6', 'HHIP', 'SERTAD4', 'A_32_P11262', 'NPNT',\n", - " 'SERPINF1', 'KLK7', 'RSPH9', 'LYPD1', 'GABRQ', 'PYDC1', 'SYTL2',\n", - " 'RP13-102H20.1', 'SSTR1', 'LXN', 'TMEM215', 'C1QL1', 'PIRT',\n", - " 'KCNG1', 'AQP3', 'A_23_P213527', 'RP11-45B20.3', 'C1orf187',\n", - " 'GPR39', 'OPRK1', 'CD36', 'MYB', 'GPR26', 'LMO1', 'GPR88', 'VGLL3',\n", - " 'NR2F2', 'GPR83', 'GDA', 'NRG1', 'RSPO2', 'A_32_P136776',\n", - " 'C20orf103', 'SYTL1', 'LGALS2', 'KIAA1772', 'A_32_P115840',\n", - " 'SEMA3D', 'GEFT'], dtype=object)" - ] - }, - "execution_count": 154, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "set1" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['SERPINF1', 'KLK7', 'RSPH9', 'LYPD1', 'GABRQ', 'PYDC1', 'SYTL2',\n", - " 'RP13-102H20.1', 'SSTR1', 'LXN', 'TMEM215', 'C1QL1', 'PIRT',\n", - " 'KCNG1', 'AQP3', 'A_23_P213527', 'RP11-45B20.3', 'C1orf187',\n", - " 'GPR39', 'OPRK1', 'CD36', 'MYB', 'GPR26', 'LMO1', 'GPR88', 'VGLL3',\n", - " 'NR2F2', 'GPR83', 'GDA', 'NRG1', 'RSPO2', 'A_32_P136776',\n", - " 'C20orf103', 'SYTL1', 'LGALS2', 'KIAA1772', 'A_32_P115840', 'SEMA3D'], dtype=object)" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "AQP3 & & BDKRB1 & \\\\\n", - "C1QL1 & & BNC2 & \\\\\n", - "C1orf187 & & C1orf133 & \\\\\n", - "C20orf103 & & CASR & \\\\\n", - "CD36 & & COL5A2 & \\\\\n", - "GABRQ & & CTXN3 & \\\\\n", - "GDA & & DDC & \\\\\n", - "GPR26 & & DGKI & \\\\\n", - "GPR39 & & FAM43B & \\\\\n", - "GPR83 & & FSTL4 & \\\\\n", - "GPR88 & & GAL & \\\\\n", - "KCNG1 & & GEFT & \\\\\n", - "KIAA1772 & & GREM2 & \\\\\n", - "KLK7 & & GRHL2 & \\\\\n", - "LGALS2 & & HHIP & \\\\\n", - "LMO1 & & HPSE2 & \\\\\n", - "LXN & & KDELR3 & \\\\\n", - "LYPD1 & & NPNT & \\\\\n", - "MYB & & NTN1 & \\\\\n", - "NR2F2 & & ONECUT2 & \\\\\n", - "NRG1 & & OSBPL3 & \\\\\n", - "OPRK1 & & PDLIM5 & \\\\\n", - "PIRT & & PVALB & \\\\\n", - "PYDC1 & & RGMA & \\\\\n", - "RSPH9 & & SERTAD4 & \\\\\n", - "RSPO2 & & TNNT2 & \\\\\n", - "SEMA3D & & TPBG & \\\\\n", - "SERPINF1 & & TTR & \\\\\n", - "SSTR1 & & WNT10A & \\\\\n", - "SYTL1 & & & \\\\\n", - "SYTL2 & & & \\\\\n", - "TMEM215 & & & \\\\\n", - "VGLL3 & & & \\\\\n" - ] - } - ], - "source": [ - "me = pandas.read_csv('/home/users/jvogel/Science/Allen_Human_Brain_Atlas/MAIN_model_genes_of_importance.csv',\n", - " index_col=0)\n", - "set1a = me[(me.step==1) & (me['ant-post']=='anterior')].gene_symbol.unique()\n", - "set1p = me[(me.step==1) & (me['ant-post']=='posterior')].gene_symbol.unique()\n", - "\n", - "set1a = sorted([x for x in set1a if '_' not in x and '-' not in x])\n", - "set1p = sorted([x for x in set1p if '_' not in x and '-' not in x])\n", - "\n", - "\n", - "for i in range(len(set1a)):\n", - " if i < len(set1p):\n", - " print('%s & & %s & \\\\\\\\'%(set1a[i],set1p[i]))\n", - " else:\n", - " print('%s & & & \\\\\\\\'%(set1a[i])) " - ] - }, - { - "cell_type": "code", - "execution_count": 150, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "*** GAL ***\n", - "\n", - "\n", - "\n", - "*** A_24_P102119 ***\n", - "\n", - "\n", - "\n", - "*** ONECUT2 ***\n", - "\n", - "\n", - "\n", - "*** PDLIM5 ***\n", - "\n", - "\n", - "\n", - "*** COL5A2 ***\n", - "lee2017_ventral_P14_VH_vs_DH\n", - "lee2017_dorsal_P14_VH_vs_DH\n", - "lee2017_ventral_P45_VH_vs_DH\n", - "lee2017_dorsal_P45_VH_vs_DH\n", - "\n", - "\n", - "\n", - "*** KDELR3 ***\n", - "\n", - "\n", - "\n", - "*** GREM2 ***\n", - "\n", - "\n", - "\n", - "*** HPSE2 ***\n", - "\n", - "\n", - "\n", - "*** DDC ***\n", - "\n", - "\n", - "\n", - "*** CTXN3 ***\n", - "\n", - "\n", - "\n", - "*** FAM43B ***\n", - "\n", - "\n", - "\n", - "*** A_24_P62668 ***\n", - "\n", - "\n", - "\n", - "*** PVALB ***\n", - "\n", - "\n", - "\n", - "*** CASR ***\n", - "\n", - "\n", - "\n", - "*** WNT10A ***\n", - "\n", - "\n", - "\n", - "*** TTR ***\n", - "Leonardo_2006\n", - "Leonardo_2006_dorsal\n", - "Leonardo_2006_ventral\n", - "Leonardo_2006_dorsal_top1p\n", - "lee2017_ventral_P28_VH_vs_DH\n", - "lee2017_dorsal_P28_VH_vs_DH\n", - "lee2017_ventral_P45_VH_vs_DH\n", - "lee2017_dorsal_P45_VH_vs_DH\n", - "\n", - "\n", - "\n", - "*** FSTL4 ***\n", - "lee2017_dorsal\n", - "\n", - "\n", - "\n", - "*** TNNT2 ***\n", - "\n", - "\n", - "\n", - "*** A_32_P121537 ***\n", - "\n", - "\n", - "\n", - "*** A_24_P401842 ***\n", - "\n", - "\n", - "\n", - "*** DGKI ***\n", - "lee2017_dorsal\n", - "\n", - "\n", - "\n", - "*** C1orf133 ***\n", - "\n", - "\n", - "\n", - "*** RP11-291L15.2 ***\n", - "\n", - "\n", - "\n", - "*** NTN1 ***\n", - "\n", - "\n", - "\n", - "*** OSBPL3 ***\n", - "\n", - "\n", - "\n", - "*** TPBG ***\n", - "Christensen_v\n", - "lee2017_ventral_P14_VH_vs_DH\n", - "lee2017_dorsal_P14_VH_vs_DH\n", - "lee2017_ventral_P45_VH_vs_DH\n", - "lee2017_dorsal_P45_VH_vs_DH\n", - "\n", - "\n", - "\n", - "*** BNC2 ***\n", - "\n", - "\n", - "\n", - "*** BDKRB1 ***\n", - "\n", - "\n", - "\n", - "*** RGMA ***\n", - "\n", - "\n", - "\n", - "*** GRHL2 ***\n", - "\n", - "\n", - "\n", - "*** RP11-561O23.6 ***\n", - "\n", - "\n", - "\n", - "*** HHIP ***\n", - "\n", - "\n", - "\n", - "*** SERTAD4 ***\n", - "lee2017_dorsal\n", - "Cembrowski2013_dorsal\n", - "\n", - "\n", - "\n", - "*** A_32_P11262 ***\n", - "\n", - "\n", - "\n", - "*** NPNT ***\n", - "\n", - "\n", - "\n", - "*** SERPINF1 ***\n", - "\n", - "\n", - "\n", - "*** KLK7 ***\n", - "\n", - "\n", - "\n", - "*** RSPH9 ***\n", - "lee2017_ventral\n", - "lee2017_ventral_P14_VH_vs_DH\n", - "lee2017_dorsal_P14_VH_vs_DH\n", - "lee2017_ventral_P28_VH_vs_DH\n", - "lee2017_dorsal_P28_VH_vs_DH\n", - "lee2017_ventral_P45_VH_vs_DH\n", - "lee2017_dorsal_P45_VH_vs_DH\n", - "\n", - "\n", - "\n", - "*** LYPD1 ***\n", - "lee2017_ventral\n", - "lee2017_ventral_P14_VH_vs_DH\n", - "lee2017_dorsal_P14_VH_vs_DH\n", - "lee2017_ventral_P28_VH_vs_DH\n", - "lee2017_dorsal_P28_VH_vs_DH\n", - "lee2017_ventral_P45_VH_vs_DH\n", - "lee2017_dorsal_P45_VH_vs_DH\n", - "\n", - "\n", - "\n", - "*** GABRQ ***\n", - "lee2017_ventral_P14_VH_vs_DH\n", - "lee2017_dorsal_P14_VH_vs_DH\n", - "lee2017_ventral_P28_VH_vs_DH\n", - "lee2017_dorsal_P28_VH_vs_DH\n", - "\n", - "\n", - "\n", - "*** PYDC1 ***\n", - "\n", - "\n", - "\n", - "*** SYTL2 ***\n", - "\n", - "\n", - "\n", - "*** RP13-102H20.1 ***\n", - "\n", - "\n", - "\n", - "*** SSTR1 ***\n", - "Oreilly_V\n", - "Christensen_v\n", - "lee2017_ventral\n", - "lee2017_ventral_P14_VH_vs_DH\n", - "lee2017_dorsal_P14_VH_vs_DH\n", - "lee2017_ventral_P28_VH_vs_DH\n", - "lee2017_dorsal_P28_VH_vs_DH\n", - "lee2017_ventral_P45_VH_vs_DH\n", - "lee2017_dorsal_P45_VH_vs_DH\n", - "\n", - "\n", - "\n", - "*** LXN ***\n", - "\n", - "\n", - "\n", - "*** TMEM215 ***\n", - "\n", - "\n", - "\n", - "*** C1QL1 ***\n", - "\n", - "\n", - "\n", - "*** PIRT ***\n", - "\n", - "\n", - "\n", - "*** KCNG1 ***\n", - "lee2017_ventral_P45_VH_vs_DH\n", - "lee2017_dorsal_P45_VH_vs_DH\n", - "Cembrowski2013_ventral\n", - "\n", - "\n", - "\n", - "*** AQP3 ***\n", - "\n", - "\n", - "\n", - "*** A_23_P213527 ***\n", - "\n", - "\n", - "\n", - "*** RP11-45B20.3 ***\n", - "\n", - "\n", - "\n", - "*** C1orf187 ***\n", - "\n", - "\n", - "\n", - "*** GPR39 ***\n", - "lee2017_ventral\n", - "lee2017_ventral_P14_VH_vs_DH\n", - "lee2017_dorsal_P14_VH_vs_DH\n", - "lee2017_ventral_P28_VH_vs_DH\n", - "lee2017_dorsal_P28_VH_vs_DH\n", - "lee2017_ventral_P45_VH_vs_DH\n", - "lee2017_dorsal_P45_VH_vs_DH\n", - "\n", - "\n", - "\n", - "*** OPRK1 ***\n", - "\n", - "\n", - "\n", - "*** CD36 ***\n", - "\n", - "\n", - "\n", - "*** MYB ***\n", - "\n", - "\n", - "\n", - "*** GPR26 ***\n", - "\n", - "\n", - "\n", - "*** LMO1 ***\n", - "lee2017_ventral_P14_VH_vs_DH\n", - "lee2017_dorsal_P14_VH_vs_DH\n", - "lee2017_ventral_P28_VH_vs_DH\n", - "lee2017_dorsal_P28_VH_vs_DH\n", - "\n", - "\n", - "\n", - "*** GPR88 ***\n", - "\n", - "\n", - "\n", - "*** VGLL3 ***\n", - "\n", - "\n", - "\n", - "*** NR2F2 ***\n", - "Oreilly_V\n", - "Leonardo_2006_ventral_top1p\n", - "lee2017_ventral\n", - "lee2017_ventral_P14_VH_vs_DH\n", - "lee2017_dorsal_P14_VH_vs_DH\n", - "lee2017_ventral_P28_VH_vs_DH\n", - "lee2017_dorsal_P28_VH_vs_DH\n", - "lee2017_ventral_P45_VH_vs_DH\n", - "lee2017_dorsal_P45_VH_vs_DH\n", - "Cembrowski2013_ventral\n", - "\n", - "\n", - "\n", - "*** GPR83 ***\n", - "lee2017_ventral_P28_VH_vs_DH\n", - "lee2017_dorsal_P28_VH_vs_DH\n", - "lee2017_ventral_P45_VH_vs_DH\n", - "lee2017_dorsal_P45_VH_vs_DH\n", - "\n", - "\n", - "\n", - "*** GDA ***\n", - "Oreilly_V\n", - "Leonardo_2006_ventral_top1p\n", - "lee2017_ventral\n", - "lee2017_ventral_P14_VH_vs_DH\n", - "lee2017_dorsal_P14_VH_vs_DH\n", - "lee2017_ventral_P28_VH_vs_DH\n", - "lee2017_dorsal_P28_VH_vs_DH\n", - "lee2017_ventral_P45_VH_vs_DH\n", - "lee2017_dorsal_P45_VH_vs_DH\n", - "\n", - "\n", - "\n", - "*** NRG1 ***\n", - "\n", - "\n", - "\n", - "*** RSPO2 ***\n", - "Cembrowski2013_ventral\n", - "\n", - "\n", - "\n", - "*** A_32_P136776 ***\n", - "\n", - "\n", - "\n", - "*** C20orf103 ***\n", - "\n", - "\n", - "\n", - "*** SYTL1 ***\n", - "\n", - "\n", - "\n", - "*** LGALS2 ***\n", - "\n", - "\n", - "\n", - "*** KIAA1772 ***\n", - "\n", - "\n", - "\n", - "*** A_32_P115840 ***\n", - "\n", - "\n", - "\n", - "*** SEMA3D ***\n", - "\n", - "\n", - "\n", - "*** GEFT ***\n", - "\n", - "\n", - "\n" - ] - } - ], - "source": [ - "me = pandas.read_csv('/Users/jakevogel/Science/AHBA/MAIN_model_genes_of_importance.csv',\n", - " index_col=0)\n", - "set1 = me[me.step==1].gene_symbol.unique()\n", - "for gene in set1:\n", - " print('***',gene,'***')\n", - " for col in studies.columns:\n", - " slist = [str(x).upper() for x in studies[col].values if pandas.notnull(x)]\n", - " if gene.upper() in slist:\n", - " print(col)\n", - " print('\\n\\n')" - ] - }, - { - "cell_type": "code", - "execution_count": 163, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "*** NPTX1 *** \n", - "['Cembrowski2017_V'] \n", - "\n", - "\n", - "*** MEGF11 *** \n", - "['Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** ATP2B4 *** \n", - "['lee2017_ventral', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** PLXNC1 *** \n", - "['Leonardo_2006_ventral_top1p', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** MARCH1 *** \n", - "['lee2017_ventral'] \n", - "\n", - "\n", - "*** MET *** \n", - "['Leonardo_2006_ventral_top1p', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** HTR3A *** \n", - "['Christensen_v', 'lee2017_ventral'] \n", - "\n", - "\n", - "*** GRP *** \n", - "['Dong', 'lee2017_ventral', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** HR *** \n", - "['lee2017_dorsal'] \n", - "\n", - "\n", - "*** KCNK2 *** \n", - "['Leonardo_2006_dorsal_top1p'] \n", - "\n", - "\n", - "*** HTR2C *** \n", - "['Dong', 'Leonardo_2006_ventral_top1p', 'lee2017_ventral', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** KCNH5 *** \n", - "['lee2017_dorsal'] \n", - "\n", - "\n", - "*** HDC *** \n", - "['Christensen_d', 'lee2017_dorsal'] \n", - "\n", - "\n", - "*** USP2 *** \n", - "['lee2017_dorsal'] \n", - "\n", - "\n", - "*** CARTPT *** \n", - "['lee2017_dorsal'] \n", - "\n", - "\n", - "*** IGFBP5 *** \n", - "['lee2017_ventral'] \n", - "\n", - "\n", - "*** DIO3 *** \n", - "['Dong', 'lee2017_ventral', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** JUN *** \n", - "['Leonardo_2006_ventral_top1p', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** PTPRG *** \n", - "['Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** GALR1 *** \n", - "['lee2017_ventral'] \n", - "\n", - "\n", - "*** DGKG *** \n", - "['lee2017_ventral'] \n", - "\n", - "\n", - "*** ETV1 *** \n", - "['Leonardo_2006_ventral_top1p', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** TRHR *** \n", - "['lee2017_ventral'] \n", - "\n", - "\n", - "*** PENK *** \n", - "['Christensen_v', 'lee2017_ventral', 'Cembrowski2013_dorsal'] \n", - "\n", - "\n", - "*** NECAB1 *** \n", - "['lee2017_ventral', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** COCH *** \n", - "['Thompson'] \n", - "\n", - "\n", - "*** SLC26A4 *** \n", - "['Leonardo_2006_ventral_top1p'] \n", - "\n", - "\n", - "*** SLIT2 *** \n", - "['Cembrowski2017_V', 'Thompson', 'Leonardo_2006', 'Leonardo_2006_ventral', 'Leonardo_2006_ventral_top1p', 'lee2017_ventral', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** FXYD6 *** \n", - "['Christensen_v', 'Leonardo_2006_ventral_top1p', 'lee2017_ventral', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** CYP26B1 *** \n", - "['Oreilly_D', 'lee2017_dorsal'] \n", - "\n", - "\n", - "*** SEMA3C *** \n", - "['lee2017_dorsal'] \n", - "\n", - "\n", - "*** CADM1 *** \n", - "['Oreilly_V', 'lee2017_ventral', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** S100A10 *** \n", - "['Leonardo_2006_ventral_top1p', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** MYO16 *** \n", - "['lee2017_ventral'] \n", - "\n", - "\n", - "*** WIPF3 *** \n", - "['lee2017_dorsal', 'Cembrowski2013_dorsal'] \n", - "\n", - "\n" - ] - } - ], - "source": [ - "set2 = me[me.step==2].gene_symbol.unique()\n", - "set2 = list(set(set2) - set(set1))\n", - "set2_hits = {}\n", - "for gene in set2:\n", - " hits = []\n", - " for col in [x for x in studies.columns if '_P' not in x]:\n", - " slist = [str(x).upper() for x in studies[col].values if pandas.notnull(x)]\n", - " if gene.upper() in slist:\n", - " hits.append(col)\n", - " if len(hits)>0:\n", - " ap = me[me.gene_symbol==gene]['ant-post'].values[0]\n", - " print('***',gene,'***','<%s>'%ap)\n", - " print(hits,'\\n\\n')\n", - " set2_hits.update({gene: hits})" - ] - }, - { - "cell_type": "code", - "execution_count": 162, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "*** TOX *** \n", - "['Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** VAV3 *** \n", - "['lee2017_dorsal'] \n", - "\n", - "\n", - "*** CAMK2A *** \n", - "['Leonardo_2006_dorsal_top1p', 'lee2017_dorsal'] \n", - "\n", - "\n", - "*** LUM *** \n", - "['Christensen_v', 'lee2017_ventral'] \n", - "\n", - "\n", - "*** HOMER2 *** \n", - "['Leonardo_2006_dorsal_top1p'] \n", - "\n", - "\n", - "*** PPAPDC1A *** \n", - "['lee2017_ventral'] \n", - "\n", - "\n", - "*** KCNF1 *** \n", - "['lee2017_ventral', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** BACE2 *** \n", - "['lee2017_ventral'] \n", - "\n", - "\n", - "*** EPHA5 *** \n", - "['lee2017_dorsal'] \n", - "\n", - "\n", - "*** ACVR1C *** \n", - "['Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** KCNJ16 *** \n", - "['lee2017_ventral'] \n", - "\n", - "\n", - "*** PID1 *** \n", - "['Cembrowski2013_dorsal'] \n", - "\n", - "\n", - "*** CAMK4 *** \n", - "['lee2017_dorsal'] \n", - "\n", - "\n", - "*** KL *** \n", - "['Leonardo_2006', 'Leonardo_2006_dorsal', 'Leonardo_2006_ventral', 'Leonardo_2006_dorsal_top1p', 'Cembrowski2013_dorsal'] \n", - "\n", - "\n", - "*** CRIM1 *** \n", - "['lee2017_dorsal'] \n", - "\n", - "\n", - "*** NTS *** \n", - "['Oreilly_D', 'Cembrowski2017_D', 'lee2017_dorsal'] \n", - "\n", - "\n", - "*** SEPT9 *** \n", - "['Christensen_v'] \n", - "\n", - "\n", - "*** SLC13A5 *** \n", - "['lee2017_ventral'] \n", - "\n", - "\n", - "*** SYT13 *** \n", - "['Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** CPNE2 *** \n", - "['Leonardo_2006_ventral_top1p', 'lee2017_ventral', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** LNX1 *** \n", - "['Leonardo_2006_ventral_top1p'] \n", - "\n", - "\n", - "*** EPHA7 *** \n", - "['Thompson', 'Leonardo_2006', 'Leonardo_2006_dorsal', 'Leonardo_2006_ventral', 'Leonardo_2006_dorsal_top1p', 'lee2017_dorsal', 'Cembrowski2013_dorsal'] \n", - "\n", - "\n", - "*** RAB3B *** \n", - "['Christensen_v', 'lee2017_ventral', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** MATN2 *** \n", - "['Thompson', 'Dong'] \n", - "\n", - "\n", - "*** ARNT2 *** \n", - "['lee2017_ventral'] \n", - "\n", - "\n", - "*** AK5 *** \n", - "['Leonardo_2006_ventral_top1p', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** KCNC4 *** \n", - "['Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** PCP4 *** \n", - "['Leonardo_2006_ventral_top1p', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** CD24 *** \n", - "['Christensen_v'] \n", - "\n", - "\n", - "*** PDE11A *** \n", - "['Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** VIP *** \n", - "['Leonardo_2006_ventral_top1p'] \n", - "\n", - "\n", - "*** KCNS2 *** \n", - "['lee2017_dorsal'] \n", - "\n", - "\n", - "*** MEX3B *** \n", - "['lee2017_dorsal'] \n", - "\n", - "\n", - "*** RASD2 *** \n", - "['Oreilly_D', 'lee2017_dorsal', 'Cembrowski2013_dorsal'] \n", - "\n", - "\n", - "*** PHACTR2 *** \n", - "['lee2017_ventral'] \n", - "\n", - "\n", - "*** CACNA1A *** \n", - "['lee2017_dorsal'] \n", - "\n", - "\n", - "*** TCERG1L *** \n", - "['lee2017_ventral'] \n", - "\n", - "\n", - "*** SOX5 *** \n", - "['Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** NKAIN4 *** \n", - "['Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** TIMP2 *** \n", - "['Oreilly_V', 'Christensen_v', 'lee2017_ventral', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** KCNH7 *** \n", - "['lee2017_dorsal', 'Cembrowski2013_dorsal'] \n", - "\n", - "\n", - "*** CALB2 *** \n", - "['lee2017_ventral'] \n", - "\n", - "\n", - "*** NR2F1 *** \n", - "['lee2017_ventral'] \n", - "\n", - "\n", - "*** PDE2A *** \n", - "['lee2017_dorsal'] \n", - "\n", - "\n", - "*** NELL1 *** \n", - "['lee2017_ventral'] \n", - "\n", - "\n", - "*** ARC *** \n", - "['Leonardo_2006_dorsal_top1p'] \n", - "\n", - "\n", - "*** NTF3 *** \n", - "['Oreilly_D'] \n", - "\n", - "\n", - "*** ST18 *** \n", - "['lee2017_ventral', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** MUM1L1 *** \n", - "['lee2017_ventral'] \n", - "\n", - "\n", - "*** SNTG1 *** \n", - "['lee2017_ventral'] \n", - "\n", - "\n", - "*** NOV *** \n", - "['Dong', 'Leonardo_2006_ventral_top1p', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** EDNRA *** \n", - "['lee2017_ventral'] \n", - "\n", - "\n", - "*** RGS8 *** \n", - "['Cembrowski2013_dorsal'] \n", - "\n", - "\n", - "*** KALRN *** \n", - "['lee2017_dorsal'] \n", - "\n", - "\n", - "*** TTC39C *** \n", - "['Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** WNT2 *** \n", - "['Leonardo_2006_dorsal_top1p'] \n", - "\n", - "\n", - "*** SYT2 *** \n", - "['Cembrowski2017_D'] \n", - "\n", - "\n", - "*** SEMA3E *** \n", - "['Thompson'] \n", - "\n", - "\n", - "*** PTTG1 *** \n", - "['Cembrowski2013_dorsal'] \n", - "\n", - "\n", - "*** LIMD2 *** \n", - "['lee2017_dorsal'] \n", - "\n", - "\n", - "*** FARP1 *** \n", - "['Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** HPCA *** \n", - "['Leonardo_2006_dorsal_top1p', 'Cembrowski2013_dorsal'] \n", - "\n", - "\n", - "*** LYZL4 *** \n", - "['lee2017_dorsal'] \n", - "\n", - "\n", - "*** SEMA4G *** \n", - "['Thompson', 'lee2017_ventral'] \n", - "\n", - "\n", - "*** HPCAL1 *** \n", - "['lee2017_ventral'] \n", - "\n", - "\n", - "*** NTNG2 *** \n", - "['Cembrowski2017_V'] \n", - "\n", - "\n", - "*** RASSF5 *** \n", - "['Leonardo_2006_ventral_top1p', 'lee2017_ventral'] \n", - "\n", - "\n", - "*** AMIGO2 *** \n", - "['Thompson'] \n", - "\n", - "\n", - "*** CACNG8 *** \n", - "['lee2017_dorsal', 'Cembrowski2013_dorsal'] \n", - "\n", - "\n", - "*** RIT2 *** \n", - "['Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** SAMD5 *** \n", - "['Cembrowski2013_dorsal'] \n", - "\n", - "\n", - "*** ITGBL1 *** \n", - "['Thompson', 'lee2017_ventral', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** JDP2 *** \n", - "['Cembrowski2013_dorsal'] \n", - "\n", - "\n", - "*** PLEKHG1 *** \n", - "['Cembrowski2013_dorsal'] \n", - "\n", - "\n", - "*** CXCL12 *** \n", - "['Leonardo_2006_ventral_top1p'] \n", - "\n", - "\n", - "*** SLC17A6 *** \n", - "['Leonardo_2006_dorsal_top1p', 'lee2017_ventral'] \n", - "\n", - "\n", - "*** AK3L1 *** \n", - "['Leonardo_2006_ventral_top1p'] \n", - "\n", - "\n", - "*** FBLN2 *** \n", - "['Dong'] \n", - "\n", - "\n", - "*** RXRG *** \n", - "['Oreilly_V', 'Christensen_v', 'lee2017_ventral'] \n", - "\n", - "\n", - "*** MEST *** \n", - "['Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** ADRA1A *** \n", - "['Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** TSPAN9 *** \n", - "['lee2017_ventral', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** GRIN3A *** \n", - "['Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** OLFML2B *** \n", - "['Oreilly_D', 'lee2017_dorsal'] \n", - "\n", - "\n", - "*** CALB1 *** \n", - "['Leonardo_2006_ventral_top1p'] \n", - "\n", - "\n", - "*** DPP10 *** \n", - "['lee2017_ventral'] \n", - "\n", - "\n", - "*** GPD1 *** \n", - "['Christensen_v'] \n", - "\n", - "\n", - "*** GPR125 *** \n", - "['Thompson'] \n", - "\n", - "\n", - "*** HTR2A *** \n", - "['lee2017_ventral'] \n", - "\n", - "\n", - "*** SLC16A2 *** \n", - "['lee2017_ventral', 'Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** SPARCL1 *** \n", - "['Cembrowski2013_ventral'] \n", - "\n", - "\n", - "*** KCNJ11 *** \n", - "['lee2017_dorsal'] \n", - "\n", - "\n" - ] - } - ], - "source": [ - "set3 = me[me.step==3].gene_symbol.unique()\n", - "set3 = list(set(set3) - set(set1))\n", - "set3 = list(set(set3) - set(set2))\n", - "set3_hits = {}\n", - "for gene in set3:\n", - " hits = []\n", - " for col in [x for x in studies.columns if '_P' not in x]:\n", - " slist = [str(x).upper() for x in studies[col].values if pandas.notnull(x)]\n", - " if gene.upper() in slist:\n", - " hits.append(col)\n", - " if len(hits)>0:\n", - " ap = me[me.gene_symbol==gene]['ant-post'].values[0]\n", - " print('***',gene,'***','<%s>'%ap)\n", - " print(hits,'\\n\\n')\n", - " set3_hits.update({gene: hits})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "anaconda-cloud": {}, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}