diff --git a/publication_analysis/results/Fig_2b_Table_S5_inputs_Fig_3_S4_analysis_hsmp_proteome_predictions.ipynb b/publication_analysis/results/Fig_2b_Table_S5_inputs_Fig_3_S4_analysis_hsmp_proteome_predictions.ipynb index cfc2011..a1eb361 100644 --- a/publication_analysis/results/Fig_2b_Table_S5_inputs_Fig_3_S4_analysis_hsmp_proteome_predictions.ipynb +++ b/publication_analysis/results/Fig_2b_Table_S5_inputs_Fig_3_S4_analysis_hsmp_proteome_predictions.ipynb @@ -147,7 +147,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "gold standard has 33042 pairs\n", + "gold standard has 32504 pairs\n", "filtered gold standard has 31888 pairs\n", "\n" ] @@ -184,32 +184,32 @@ "output_type": "stream", "text": [ "HT-VIDAL\n", - " Reported interactions: 13944\n", + " Reported interactions: 13375\n", " Number of proteins: 4204\n", - " Number of true positives based on gold: 458\n", - " Number of gold possible to detect: 3465.0\n", - " Recall of gold: 0.13217893217893217 \n", + " Number of true positives based on gold: 420\n", + " Number of gold possible to detect: 3397.0\n", + " Recall of gold: 0.12363850456284957 \n", "\n", "HT-Mann\n", " Reported interactions: 27963\n", " Number of proteins: 5351\n", " Number of true positives based on gold: 1060\n", - " Number of gold possible to detect: 11222.0\n", - " Recall of gold: 0.09445731598645518 \n", + " Number of gold possible to detect: 11010.0\n", + " Recall of gold: 0.09627611262488647 \n", "\n", "HT-Mann HC\n", - " Reported interactions: 14240\n", + " Reported interactions: 12058\n", " Number of proteins: 4281\n", - " Number of true positives based on gold: 1293\n", - " Number of gold possible to detect: 11222.0\n", - " Recall of gold: 0.11522010336838354 \n", + " Number of true positives based on gold: 951\n", + " Number of gold possible to detect: 11010.0\n", + " Recall of gold: 0.08637602179836512 \n", "\n", "HT-GYGI\n", " Reported interactions: 56416\n", " Number of proteins: 10883\n", " Number of true positives based on gold: 1414\n", - " Number of gold possible to detect: 18233.0\n", - " Recall of gold: 0.07755169198705644 \n", + " Number of gold possible to detect: 17870.0\n", + " Recall of gold: 0.0791270285394516 \n", "\n" ] } @@ -344,7 +344,7 @@ "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAJMAAAD3CAYAAAAZgGZZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAV7UlEQVR4nO3de1gU570H8O8su8sddhW8YIwQI17BKiTNRZNgjFalNuk5PVITjQlWYm3SpuZ+muhplHhsG+sTc5IHTyKJ18SKgiFWgsbU+7UF1BhAwJhFRZBlLyx7m/f84YFE5b4vszPD7/M8efIw7M77uvPlndn3fWdegTHGQAgHGn9XgKgHhYlwQ2Ei3FCYCDcUJsKNVopCbDYbqqqqoNfrpSiO9BCXy4XY2FiEhYW1+ntJWqaqqiqYTCYpiiI9yGQyoaqqqs3fS9Iy6fV6xMXFIT4+XoriiJ/QNRPhhsJEuKEwEW4oTIQbChPhhsJEuKEwkU4TxfZnK1GYSIdMFi/+XubCrL9Z2n2dJJ2WAFBZWQkA1HGpIGaHiC+r3Hh6hxUhOgETowLafb1kYSLKUVrrwRtfNiLnayf6BAtY+lAIfntvCEpL69p9n2RhouEUeXN5GBwehi/OuzFvhwX/NjIQpzKMGNO/8xGhlqmXMjtEvFhgR4OTweIU8WWlGy4vEBEo4J3p4XhqXFCX90lh6oXqHSLm5FhxzSEiNV4PnUaLtyaHIiY8ANEhAjQaoVv7pTD1EkcuurGxuAnfWUR8XubC2AFa5D8eiehQfl/oqWtA5RxuhkWfWZGSbYbdDST21+L4AiOOLTByDRJALZNqebwMu8+78MoXdugCBPxroRHDo3r2cFOYVKLeIeKCWcRlm4iiyx58VNSE2kYRC5KC8fqDIQjUdu86qCsoTApVVe/FhQYvqswics85kV/qglYDRIVokNhfi4ykIGQkByNI1/MhakY94AqzuaQJO752Yfs5J6JCNBgUrsGkOB1OZBiR0IU+oZ5ALZNC2JwM/7nHjo+LmpCRHIQTC4xIHCCvw0c94ApwpsaDaRsa0C9UgwPpBozuJ68QNaOuARljjOGT0024/wMz0sYE4tiv5BskgE5zsmVzMjy6pQEnqz34nxlhmJ3Y9eENqVGYZKjWLuKZz6xodDOc/20f9AlRxgmEwiQjBeUuLN5tw9mrXiTHaLE9LUIxQQIoTLJQ7xCR+Y9GvHvcgWWTQvHzkYGINbY/EU2OKEx+5PYyvH3IgSX77BjTT4sj8+X3db8rlFtzhfvbGSd+nX99Ouzfn4jEQ3HKf0IMhUliZ2s8yNx/fUrse6nheDwhENoA6YY8epJkV3eVlZUoLS2VqjjZudjgxUsFNiRn1SMwQMCR+UY8+aMg1QQJoJapx11rFPFxURP+a18j7husw87ZkXj4DuWf0lpDwyk9KOuEAy8W2BHfNwDvpYYhLUH+HY++oJaJo+vDH06UXPHiXK0HhRVurP95OH46XA9BUM/prC0UJg4OfetGyRUPtp9z4mS1B9OH6TE8KgBvTgrFKBmPpfHWe/6lPeSbWg8mfWTG3YN0uG+wFuseDcfAcOV1OPJAYfKB28uQsdOGuWODkDUz3N/V8TsKUzeIIsNfjziwu9yFGruI7WkR/q6SLChnFFFG3vyqEW8fdmBElBYFcyJhDKaPEfCxZcrPz0dRURFqamqwatUqFBUVYc+ePSgpKUF2djanKsqHzcnwH1stOPCtG3uejMRdg3T+rpKs+BSmwsJCrFq1CmvWrEFpaSlGjBiBwsLCVlciKC0thcvlUmxfk1dkeDrXgWuNIs4uDEO/UAaXy+XvaknK5XK1u8qET+1z84ep1+uh0WgQFBSEF154AcHBwaipqfFl17JicTJM32THMZMHm38egn6c74RVC59apgceeADLly9HfX09IiIiUFlZieLiYoiiiKioqBteGx8fr8hWyeNlyMi3wM0E/GthHxh68fVRR2vf+BSmWbNm3bJtypQpvuxSNhhj+Kz0+u3VTR6Gg+nGXh2kzqBPpw3LvmrEnBwr5v0oCGcW9cGAcPqoOkL9TK347Bsn/nTIgX3zIjE+hr6xdRb9ud2EMYbX9zbitYkhFKQuopbpB9475sAbX9oBAIvuVvd0kZ5AYfp/H55yYHGBDR/+LBwPxuoQHkiNdldRmACsPeHA87ttyJkViZ8MU+csSCn0+jAd/c6N53bZkDc7Eo8MpSD5otffUPBqoR3P/jiYgsRBr74wOFntxqGLbiy+L8TfVVEFycIktxsKmtwMCz+zYf74IPQP69V/U9z0yk/R6WGYk2OB08Ow8pEwf1dHNXrdBfhxkxsL8mwQGcOuJyIRolf/XSNS6VUtk9vLkLbVgolDdDiYbkRMRO+c+N9Tek3LJIoM7x5zwCMCf54SCr0Ez8XubXpFmGpsIn66qQHnar344GfhFKQeouowMcbwjwtuzNtuxdgBWuydZ0AoXSP1GNWG6WyNB3NyrPi61oPXJobg1YkhCOjm0lekc1S5QsGpajdSshvw9LggfDkvEhFBvep7ht+ormW6ahcxY6MFv783GEtSQv1dnV5FVT3gXpFhQZ4V4wdq8cZDNEQiNdW0TIwxPL7NiqIrHhxMN/SKR9jIjWrC9OkZJ/ZWulC8kCb/+4sqPnWHm+HVQjuWPBhKQfIjVXzyrxbaYQjSYEEyzdv2J8Wf5k6Y3Hj/hAMnFhihU9GTa5VI0S0TYwwvFNjx67uCMcbPq0ASBbdMNifD7G0WlNV5sW0WPWxLDhQZJqeH4bEtDWh0M/zzGSP6KmjlIzVT3HCKV2R4YpsFNXYRXz1loIdJyIjiWqY39jbiRLUHh+ZTkORGUSsU7K1w4a9HGnEw3dhrH48sZ4r50/aKDL/fbcOrE0Pwo4GKa1B7BcWE6dMzTtTYGZ6/lwZw5UoRYWKMYeUBBxbfG0wzJWVMEWHaVOJEpdmLXyXRcImcyTpMosiw8kAjfpVnxQc/C6cZkzIn2ytZUWRIz7WisMKNgjkGTBhCT3GTO9mGafk/GvFFhRuH0g243UDdAEogyzCdMLmxfH8j9s2jICmJ7J7PxBjD4t12PHt3MO4ZTKc2JZFdy7Svyo1TlzzIoWW3FEd2d6e8e8yBBUlBNBNAgWR1xBpdDLvKXEhLCPR3VUg3yCpMu8+7EBWiQXKM7M6+pBNkFaaNxU3491GBdM+bQskmTN81eJF7zkV3mCiYbMK0+ogDk+J0GB5FpzilksWR++K8C2uOOfDVUwZ/V4X4gOuCz6dOncLu3btRXl6ODz/8sFP7YIzhuc9tWDYpFHffRp2UStZumHbt2gXGGABAEARMmzbtht/fvOBzUlISkpKSMHfu3Fv21daCz3sqPbhs8+LpsQG9bgFlpelowed2wxQdHX1DmFrbOfD9gs8AkJWVhSeffLLTFfzfU07MTdTTpDcV6LBl8ng8AICAgAAkJyff8PubF3wuKSlBbm4uUlJSkJSUBIPB0PLa1hZ8dnsZvqj0YvecMOj1dIqTu44WfBZYc9PThqamJly7dg2CIGDgwIHdqkTzAO/NYdp/wYWZmyy4+lJfaOk5AbLX1nFs1uEFeGZmJhwOB3Q6HTIzM7lWrqDcjYfv0FGQVKLDMA0fPhyCIODatWvcC99b6cKcsdRJqRYdhumRRx6BIAgoLy/nWrBXZPjnZQ/WzJBFVxfhoMMe8J07dyI6OhqnT5/mWvA3tV54RGB0NIVJLToMk8lkwuHDh3Hx4kWuBZ+85EFCPy0tPaEiHYZp4cKFqKmpwaJFi7gWfKragySaaqIqHYZp7dq1KC8vx/79+7kWfMzkxnh6ZoCqdBimmJgYDB06FDabzaeCfnhDwQWzF0dNHkynZd9Vpd0wlZeXw2AwIDc3F3a7nVuhG4ub8OAQHd3GpDLtnmc+/vhjmM1mJCYmYsqUKT4V9MMbCracdtLTTFSo3TAFBwdDr9fj4YcfRlNTE5cCrzWKKLnixeQ7aCxObdoN0+jRo+H1enH+/HkIgoCEhASfCzz8nRuDIzUYHEmnOLVpN0wzZ87kXuChbz24n+7UVSXJ54AfvOjG/bdTmNRI0jAxxnDqkgd3DaL+JTWSNEzVVhFWJ8OoaLpeUiNJw3T2qhe3RWgQHiibO6wIR5KuULDfGYhR0ZFSFUkkJmkTUd4QgJE05US1JH2kTrU7jK6XVEzSlulcrQcjoihMaiVZmBpdDFftDHFGCpNaSRamSzYvtBoghhZkVi3JjqzJImJwpAYBGpqmq1aShmkIDe6qmmRhqrZ6EWugU5yaSdcyWRmG0MxKVZOuZbJ4KUwqJ1mYLtS7EWCtlqo44geShcnuFmAIbPeBK0ThJAuTCAF3xg3xeT/s0lawg3eDFURc//+lrRxqR3iQbNTVwwCtj9Fll7YCxfO+32A7AxTPAwMgDPyFbzsnPpOsZWIMvndYVvypje1/9m2/hAtJO358bZlgP9e17URSygpT6IiubSeSkjhMPp7m7nixje0v+LZfwoWiWiZh4C+AxGwgbAwgaK//PzGbLr5lQtI5tDyegyoM/AVA4ZElSVumygq+z8Uk8iJpy+TzBTiAEnMh9tesx9WmKkQHxWJivzlIMEz2fcfEZ5KGaWT8MJ/eX2IuxLZvl7b8XNN0vuVnCpT/SXqaC/CxtP0161vdfqCN7URakoVJEFpfzKcrrjZVdWk7kZZkYeLxhObooNgubSfSkixMvp7iAGBivzmtbp/QxnYiLckuwHncldJ8kX3gB9/mJtC3OdmQLkyc7nBKMEym8MiUok5zRN58OsT5+fnIzMzE7373OzDGYDKZ8MwzzyAnJ+eW1/LosCTy5tNp7uYFn+Pj45GWloba2tpbXss8Hpw+fbrNVRSJ/Pm04HNndg58v+Bze/1IGg3dTCBHZyx7cahuE2qdVYgKjMV9fWdjdMSkbu3LpzDdvOBzWFgYtm/fDrvdjpSUFPTt27fltUF6PcaMGeNLcYSzEnMhcquXtfx81VmB3Opl0Gq1rX7J6WjBZ5/CNGvWrFu2rV69uvWC6JpJdtobnurON2bpvs3Rw09kh/fwlIRdA5QmueE9PCXd2Byd5mSH9/CUZD3gGgqT7PAenpIsTLSuszzxHJ6iaybCjXQtE6fY0hxw+ZKsZXI1OVoWfO6u5jngNU3nweBtmQNeYi7kUkfiG8nCxOMsR3PA5U2yMIWHhvg8yEtzwOVNUfOZaA64vEl4Q4Hv5zmaAy5viuq0pDng8qa4rgGaAy5fNGuAcKOoC3AibzRrgHAj3UPlrVafe8CJvEnXA04tk+pJdoj7REbQbU4qR9dMhBuaz0S4oX4mwg31MxFu6DRHuFHUYwiJvClqpiWRNwm7BihNaidZmMz1tTSconL0bY5wI9kh7h8dTcMpKkedloQbChPhhq6ZCDfUNUC4odMc4YZOc4QbChPhRrJDXFtzhXrAVY4Gegk3koXptpgB1AOucvRtjnBDF+CEG0U9n4nIG7VMhBsKE+FGskMcHUJpUjvJjnBEEIVJ7bg/hjA/Px9FRUWoqanBqlWrIAgCKisrAYBbP1NzTzrPfivap+/75B6mmxeBHj58ONxuN4qKiuB2uxEbGwuTyQQAGDRoEAB0++fmNYJ92Udb++zo9T2xz66U0d19tldGR/tsPn5tERhjXFdiXrRoEd59911kZWUhJSUFw4YNg81mw759+6DT6RAXF8ezOCIhl8uF2NhYhIWFtfp77mH65JNPUF5eDofDgWXLlnX8BqIa3MPUmtauo3x5/8mTJ5Gbmwuz2YwVK1Zg06ZNOHjwILKzs3vmH8Chju+//z6+/vprPPvssxg7dqxs6zl16lSkpqZi7ty5iImJ6dK+JfmKVVhYiNdeew133nlnt6ah3Pz+LVu2YOnSpZg6dSoOHDiA+fPnt9n0SqWjOo4cORJXr15FdHS0rOs5bty4G65Fu0KSMDVXTq/XQ9ONh1ve/H63233Dz11t6XpCR3WcPn06lixZgg0bNvizmh3W85133sFjjz2GzZs3d3nfkpzmfL2Oan5/fX09kpOTERMTg127dkEURSxduhTbtm3D2rVrkZmZifvvv78H/gW+1fG5557DunXrUFdXh7lz52LcuHF+qWNn6rl582aYTCZkZGRgxIgRXdq3JGEivQN1SxNuKEyEGwoT4YbCRLiRbL05tcnNzUVJSQkMBgNOnz6Nu+66C01NTVi0aBEWL16MhIQElJeX96pRAApTN3m9XqSmpiIqKgp6vR7p6enIzs5GWVkZvF4vqqurcfvtt/u7mpKi05wPPv/8c+zYseOGbTqdDhEREXjxxRdbpt70FtQydZMgCJgxYwb69euHJUuWQKPRgDGG2NhYBAQEQKfTwWg0ora2FlFRUf6uriSo05JwQ6c5wg2FiXBDYSLcUJgINxQmwg2FiXBDYSLcqDpMR44c8en9IhNRajmMr65ko9RyGCITOdWMH8ZEsKt/Bzv/39f/78c6KqLTcseOHRg2bBhGjx6NjIwMDB06FIMHD8Yvf/lLTJw4ERs2bMCQIUOwceNGVFRU4PXXX0ddXR127NgBp9MJALhy5UpLT3VniEzEJxdewzeWAy3bhkdMwKwhmdAIre9j/fr1EAQBFoulzTKXLl2KwYMHIz09HdXV1XjiiSewd+9evPnmm+jfvz9mzpyJAQMGdKqOjInAP9OAq/nfb4yeAYzbAqGVOi5fvhzPP/88XnnlFcTHx6OiogIrVqzAypUr8Yc//AHHjx9HfX09pkyZ0qnybyZpy2R2iKi2eDv8z+y48a9LFEXk5OQgKysLLpcLoihi6tSpAIAJEyYgNzcXLpcLDQ0NLQdu586dSE1NhdfrRVpaGsLDw1sOcmeUW4/eECQA+MZyAOXWo62+3ul0QqvVoqGhAR6Pp80yNRoNGhoa4HK5kJeXhwkTJgAAGhoacM8993Q6SACA2oIbgwRc/7m2oNWXz549G3/5y18wfvx4/OY3v0F4eDi8Xi/q6uqQlZWFvLw8iGL3WzbJxuY8XoYhq67B4uy4IYwIFFD3cl9of/DswkcffRQJCQm4fPkynnrqKfzxj3/EihUrEBwcDI/Hg61bt2Lq1KnYsmULgOutQv/+/QFcn0QfGxsLg8HQ6fpecnzT6vbLjlLER9x7y/aCggKYzWaYzWacPXsWOp0OsbGxKC4uRllZGQDgtttuAwBMnjwZn376KQRBQGBgIADgrbfewvr162EymTBt2rTOVdJS1Mb2YiD6J7dsjouLg9lsxksvvYQ9e/YgOTkZwcHBMBqNWLBgAY4ePYr6+vrOld0KycKkDRBw4fk+aHR3HKYQnXBDkARBaGlxysrKkJeXhz59+kCn00Gj0SA1NRVvv/02Hn/8cWg0GhQVFSExMbHlvWlpaTAajV2q78Dg4a1uHxDc+sMdzp49i5dffhlerxejRo3CmjVrWsp84IEHWl53/PhxJCYmYuXKlVi5ciU++ugjWK1WbNy4EVarFePHj+98JSPauJkzIrHNtxiNRpw7dw7r1q1DSkoKTCZTy2f7w8+5OxRxzdRVq1evxsKFC6HX67u9j+5cM0mtq9dMPU2VYeJFZCLKrUdx2VGKAcHxuDP8x7IJUjPGxOvXSJbi6y1S1BS/BAmgMBGO5PVnRhSNwkS4oTARbihMhBsKE+GGwkS4oTARbihMhJv/A7TeumRPGcnfAAAAAElFTkSuQmCC\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAJMAAAD3CAYAAAAZgGZZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAV5ElEQVR4nO3de1gU190H8O8s7HK/rIIXjBFixCsYhaS5aFKM0arUJn3fvlITLwlWYm3Spub+NtE2SnxtG+sT0+bBt5HEa2JFwRArQWPq/doCagwgYMyiIsKyF5a9zXn/8IVGBRbYw+zM7O/zPHnyMOzOOe58OXP2zJk5AmOMgRAONL6uAFEPChPhhsJEuKEwEW4oTISbQCkKsVgsqKmpgU6nk6I40kscDgfi4+MRHh7e7u8laZlqampgMBikKIr0IoPBgJqamg5/L0nLpNPpkJCQgMTERCmKIz5CfSbCDYWJcENhItxQmAg3FCbCDYWJcENhIl0mip3PVqIwEY8MJjf+XuHArL+ZOn2dJIOWAFBdXQ0ANHCpIEabiC9qnHhmpxmhWgETYwI6fb1kYSLKUV7vwptfNCPvKzv6hAhY9v1Q/PKBUJSXX+/0fZKFiS6nyJvDxWBzMXx+wYn5O034j5FBOJ2lx5j+XY8ItUx+ymgT8VKRFU12BpNdxBfVTjjcQGSQgHenR+DpccHd3ieFyQ812kTMyTOjwSYiPVEHrSYQb08OQ1xEAGJDBWg0Qo/2S2HyE0cvObGptAXfmkR8VuHA2AGBKHwyCrFh/L7Q09CAytmcDIs/NSMt1wirE0juH4gTC/U4vlDPNUgAtUyq5XIz7LngwKufW6ENEPCvRXoMj+ndw01hUolGm4iLRhFXLCJKrrjwYUkL6ptFLEwJwRuPhCIosGf9oO6gMClUTaMbF5vcqDGKyD9vR2G5A4EaICZUg+T+gchKCUZWagiCtb0folY0Aq4wW8pasPMrB3actyMmVINBERpMStDiZJYeSd0YE+oN1DIphMXO8N97rfiopAVZqcE4uVCP5AHyOnw0Aq4AZ+tcmLaxCf3CNDiYGY3R/eQVolY0NCBjjDF8fKYFD/3ViIwxQTj+M/kGCaDTnGxZ7AyPb23CqVoX/jwjHLOTu395Q2oUJhmqt4p49lMzmp0MF37ZB31ClXECoTDJSFGlA0v2WHDumhupcYHYkRGpmCABFCZZaLSJyP5HM947YcPySWH48cggxOs7n4gmRxQmH3K6Gd45bMPS/VaM6ReIowvk93W/O5Rbc4X721k7fl54Yzrs35+KwvcTlP+EGAqTxM7VuZB94MaU2L+kR+DJpCAEBkh3yaM3Sda7q66uRnl5uVTFyc6lJjdeLrIgNacRQQECji7QY949waoJEkAtU69raBbxUUkLfru/GQ8O1mLX7Cg8epfyT2ntocspvSjnpA0vFVmR2DcAf0kPR0aS/AcevUEtE0c3Ln/YUXbVjfP1LhRXObHhxxH44XAdBEE9p7OOUJg4OPyNE2VXXdhx3o5TtS5MH6bD8JgAvDUpDKNkfC2NN//5l/aSr+tdmPShEfcN0uLBwYFY/3gEBkYob8CRBwqTF5xuhqxdFswdG4ycmRG+ro7PUZh6QBQZ/nTUhj2VDtRZRezIiPR1lWRBOVcRZeStL5vxzhEbRsQEomhOFPQh9DECXrZMhYWFKCkpQV1dHVavXo2SkhLs3bsXZWVlyM3N5VRF+bDYGf5rmwkHv3Fi77wo3DtI6+sqyYpXYSouLsbq1auxdu1alJeXY8SIESguLm53JYLy8nI4HA7FjjW5RYZn8m1oaBZxblE4+oUxOBwOX1dLUg6Ho9NVJrxqn1s/TJ1OB41Gg+DgYLz44osICQlBXV2dN7uWFZOdYfpmK44bXNjy41D043wnrFp41TI9/PDDWLFiBRobGxEZGYnq6mqUlpZCFEXExMTc9NrExERFtkouN0NWoQlOJuBfi/og2o/7R57WvvEqTLNmzbpt25QpU7zZpWwwxvBp+Y3bq1tcDIcy9X4dpK6gT6cDy79sxpw8M+bfE4yzi/tgQAR9VJ7QOFM7Pv3ajt8ftmH//CiMj6NvbF1Ff263YIzhjX3NeH1iKAWpm6hl+o6/HLfhzS+sAIDF96l7ukhvoDD9vw9O27CkyIIPfhSBR+K1iAiiRru7KEwA1p204YU9FuTNisIPhqlzFqQU/D5Mx7514vndFhTMjsJjQylI3vD7GwpeK7biue+FUJA48OuOwalaJw5fcmLJg6G+rooqSBYmud1Q0OJkWPSpBQvGB6N/uF//TXHjl5+i3cUwJ88Eu4th1WPhvq6OavhdB/yEwYmFBRaIjGH3U1EI1an/rhGp+FXL5HQzZGwzYeIQLQ5l6hEX6Z8T/3uL37RMosjw3nEbXCLwhylh0EnwXGx/4xdhqrOI+OHmJpyvd+OvP4qgIPUSVYeJMYZ/XHRi/g4zxg4IxL750QijPlKvUW2YztW5MCfPjK/qXXh9YihemxiKgB4ufUW6RpUrFJyudSIttwnPjAvGF/OjEBnsV98zfEZ1LdM1q4gZm0z49QMhWJoW5uvq+BVVjYC7RYaFBWaMHxiIN79Pl0ikppqWiTGGJ7ebUXLVhUOZ0X7xCBu5UU2YPjlrx75qB0oX0eR/X1HFp25zMrxWbMXSR8IoSD6kik/+tWIrooM1WJhK87Z9SfGnuZMGJ94/acPJhXpoVfTkWiVSdMvEGMOLRVb8/N4QjPHxKpBEwS2Txc4we7sJFdfd2D6LHrYlB4oMk93F8MTWJjQ7Gf75rB59FbTykZop7nKKW2R4arsJdVYRXz4dTQ+TkBHFtUxv7mvGyVoXDi+gIMmNolYo2FflwJ+ONuNQpt5vH48sZ4r503aLDL/eY8FrE0Nxz0DFNah+QTFh+uSsHXVWhhceoAu4cqWIMDHGsOqgDUseCKGZkjKmiDBtLrOj2ujGz1LocomcyTpMosiw6mAzflZgxl9/FEEzJmVOtj1ZUWTIzDejuMqJojnRmDCEnuImd7IN04p/NOPzKicOZ0bjzmgaBlACWYbppMGJFQeasX8+BUlJZPd8JsYYluyx4rn7QnD/YDq1KYnsWqb9NU6cvuxCHi27pTiyuzvlveM2LEwJppkACiSrI9bsYNhd4UBGUpCvq0J6QFZh2nPBgZhQDVLjZHf2JV0gqzBtKm3Bf44KonveFEo2Yfq2yY388w66w0TBZBOmNUdtmJSgxfAYOsUplSyO3OcXHFh73IYvn472dVWIF7gu+Hz69Gns2bMHlZWV+OCDD7q0D8YYnv/MguWTwnDfHTRIqWSdhmn37t1gjAEABEHAtGnTbvr9rQs+p6SkICUlBXPnzr1tXx0t+Ly32oUrFjeeGRvgdwsoK42nBZ87DVNsbOxNYWpv58C/F3wGgJycHMybN6/LFfzf03bMTdbRpDcV8NgyuVwuAEBAQABSU1Nv+v2tCz6XlZUhPz8faWlpSElJQXR0dNtr21vw2elm+LzajT1zwqHT0SlO7jwt+Cyw1qanAy0tLWhoaIAgCBg4cGCPKtF6gffWMB246MDMzSZce7kvAuk5AbLX0XFs5bEDnp2dDZvNBq1Wi+zsbK6VK6p04tG7tBQklfAYpuHDh0MQBDQ0NHAvfF+1A3PG0iClWngM02OPPQZBEFBZWcm1YLfI8M8rLqydIYuhLsKBxxHwXbt2ITY2FmfOnOFa8Nf1brhEYHQshUktPIbJYDDgyJEjuHTpEteCT112IalfIC09oSIew7Ro0SLU1dVh8eLFXAs+XetCCk01URWPYVq3bh0qKytx4MABrgUfNzgxnp4ZoCoewxQXF4ehQ4fCYrF4VdB3byi4aHTjmMGF6bTsu6p0GqbKykpER0cjPz8fVquVW6GbSlvwyBAt3cakMp2eZz766CMYjUYkJydjypQpXhX03RsKtp6x09NMVKjTMIWEhECn0+HRRx9FS0sLlwIbmkWUXXVj8l10LU5tOg3T6NGj4Xa7ceHCBQiCgKSkJK8LPPKtE4OjNBgcRac4tek0TDNnzuRe4OFvXHiI7tRVJcnngB+65MRDd1KY1EjSMDHGcPqyC/cOovElNZI0TLVmEWY7w6hY6i+pkaRhOnfNjTsiNYgIks0dVoQjSVcoOGAPwqjYKKmKJBKTtImobArASJpyolqSPlKn1hlO/SUVk7RlOl/vwogYCpNaSRamZgfDNStDgp7CpFaShemyxY1ADRBHCzKrlmRH1mASMThKgwANTdNVK0nDNIQu7qqaZGGqNbsRH02nODWTrmUyMwyhmZWqJl3LZHJTmFROsjBdbHQiwFwrVXHEByQLk9UpIDqo0weuEIWTLEwiBNydMESq4ogPSBYmFwMC6cucqkl2eBkDDViqnKRtBbVM6kZhItxIHCbvT3Ps8jawQ/eBFUXe+P/lbRxqRniQdNqjty0Tu7wNKJ3/7w2Ws0DpfDAAwsCfeLdz4jVJWyavn4Na9fsOtv/Byx0THiQNU3WVl8/FtJ7v3nYiKWV1wMNGdG87kZSkYRqZOMy7Hdz1UgfbX/Ruv4QLaftMXpYmDPwJkJwLhI8BhMAb/0/Opc63TEj2bU4Q2l/Mp9v7GfgTgMIjS5K1TPSEZvWTLEzenuKI/EkYJmqa1E66MFGWVI9Oc4Qbrw5xYWEhsrOz8atf/QqMMRgMBjz77LPIy8u77bU0Y0D9vBoauHXB58TERGRkZKC+vv621zKXC2fOnOlwFUXiG2dN+3D4+mbU22sQExSPB/vOxujISe2+1qsFnz25dcHnzsaRNBq6mUBuzpr2Ib92edvP1+xVbT93FKjOeBWmWxd8Dg8Px44dO2C1WpGWloa+ffu2vTZYp8OYMWO8KY5wdqRhS7vbjzZswbiYH9y23dOCz16FadasWbdtW7NmTfsFUZ9Jdq611HRruyc0NODHYoPju7XdE8UNWpYZi/Hn8nn4bekj+HP5PJQZi7ns1x9N7Den3e0TOtjuiWQXenmc5sqMxdj+zbK2n+taLrT9nBQ92fsC/EzrZ3awbgOutdQgNjgeE/rN6fFnKVmYNBzCdKBuQ7vbD9ZtoDD1UFL0ZG6fnaJmDfDuMBK+FNVn4t1hJHxJ1zJxKCk+7J52tw/pYDuRlmRhcrTY2hZ87qka67/a3X6xg+1EWpKFicfIAPWZ5E2yMEWEhXp9kZf6TPKmqPlMvAfZCF/SDVpyuDOF9yAb4UtRg5YA30E2wpeihgaIvNGsAcKNojrgRN7oNEe4ke6h8maz1yPgRN6kGwGnlkn1JDvEfaIi6TYnlaM+E+FGUfOZiLzROBPhhsaZCDd0miPcKOqGAiJvipppSeRNwqEBSpPaSRYmY2M9XU5ROfo2R7iR7BD3j42lyykqR4OWhBsKE+GG+kyEGxoaINzQaY5wQ6c5wg2FiXAj2SGur7tKI+AqRxd6CTeShemOuAE0Aq5y9G2OcEMdcMKNhDMtqWlSO2qZCDcUJsKNZIc4NpTSpHaSHeHIYAqT2nF/pmVhYSFKSkpQV1eH1atXQxAEVFdXAwC3cabWkXSe41a0T+/3yT1Mty4CPXz4cDidTpSUlMDpdCI+Ph4GgwEAMGjQIADo8c+tawR7s4+O9unp9b2xz+6U0dN9dlaGp322Hr+OCIwxrisxL168GO+99x5ycnKQlpaGYcOGwWKxYP/+/dBqtUhISOBZHJGQw+FAfHw8wsPD2/099zB9/PHHqKyshM1mw/Llyz2/gagG9zC1p71+lDfvP3XqFPLz82E0GrFy5Ups3rwZhw4dQm5ubu/8AzjU8f3338dXX32F5557DmPHjpVtPadOnYr09HTMnTsXcXFx3dq3JF+xiouL8frrr+Puu+/u0TSUW9+/detWLFu2DFOnTsXBgwexYMGCDpteqXiq48iRI3Ht2jXExsbKup7jxo27qS/aHZKEqbVyOp0Omh483PLW9zudzpt+7m5L1xs81XH69OlYunQpNm7c6Mtqeqznu+++iyeeeAJbtmzp9r4lOc15249qfX9jYyNSU1MRFxeH3bt3QxRFLFu2DNu3b8e6deuQnZ2Nhx56qBf+Bd7V8fnnn8f69etx/fp1zJ07F+PGjfNJHbtSzy1btsBgMCArKwsjRozo1r4lCRPxDzQsTbihMBFuKEyEGwoT4UayxQvVJj8/H2VlZYiOjsaZM2dw7733oqWlBYsXL8aSJUuQlJSEyspKv7oKQGHqIbfbjfT0dMTExECn0yEzMxO5ubmoqKiA2+1GbW0t7rzzTl9XU1J0mvPCZ599hp07d960TavVIjIyEi+99FLb1Bt/QS1TDwmCgBkzZqBfv35YunQpNBoNGGOIj49HQEAAtFot9Ho96uvrERMT4+vqSoIGLQk3dJoj3FCYCDcUJsINhYlwQ2Ei3FCYCDcUJsKNqsN09OhRr94vMhHlpiP48mouyk1HIDKRU834YUwEu/Z3sAv/c+P/PqyjIgYtd+7ciWHDhmH06NHIysrC0KFDMXjwYPz0pz/FxIkTsXHjRgwZMgSbNm1CVVUV3njjDVy/fh07d+6E3W4HAFy9erVtpLorRCbi44uv42vTwbZtwyMnYNaQbGiE9vexYcMGCIIAk8nUYZnLli3D4MGDkZmZidraWjz11FPYt28f3nrrLfTv3x8zZ87EgAEDulRHxkTgnxnAtcJ/b4ydAYzbCqGdOq5YsQIvvPACXn31VSQmJqKqqgorV67EqlWr8Jvf/AYnTpxAY2MjpkyZ0qXybyVpy2S0iag1uT3+Z7Td/NcliiLy8vKQk5MDh8MBURQxdepUAMCECROQn58Ph8OBpqamtgO3a9cupKenw+12IyMjAxEREW0HuSsqzcduChIAfG06iErzsXZfb7fbERgYiKamJrhcrg7L1Gg0aGpqgsPhQEFBASZMmAAAaGpqwv3339/lIAEA6otuDhJw4+f6onZfPnv2bPzxj3/E+PHj8Ytf/AIRERFwu924fv06cnJyUFBQAFHsecsm2bU5l5thyOoGmOyeG8LIIAHXX+mLwO88u/Dxxx9HUlISrly5gqeffhq/+93vsHLlSoSEhMDlcmHbtm2YOnUqtm7dCuBGq9C/f38ANybRx8fHIzo6usv1vWz7ut3tV2zlSIx84LbtRUVFMBqNMBqNOHfuHLRaLeLj41FaWoqKigoAwB133AEAmDx5Mj755BMIgoCgoCAAwNtvv40NGzbAYDBg2rRpXaukqaSD7aVA7A9u25yQkACj0YiXX34Ze/fuRWpqKkJCQqDX67Fw4UIcO3YMjY2NXSu7HZKFKTBAwMUX+qDZ6TlMoVrhpiAJgtDW4lRUVKCgoAB9+vSBVquFRqNBeno63nnnHTz55JPQaDQoKSlBcnJy23szMjKg1+u7Vd+BIcPb3T4gpP2HO5w7dw6vvPIK3G43Ro0ahbVr17aV+fDDD7e97sSJE0hOTsaqVauwatUqfPjhhzCbzdi0aRPMZjPGjx/f9UpGdnAzZ2Ryh2/R6/U4f/481q9fj7S0NBgMhrbP9rufc08oos/UXWvWrMGiRYug0+l6vI+e9Jmk1t0+U29TZZh4EZmISvMxXLGVY0BIIu6O+J5sgtSKMfFGH8lUeqNFipnikyABFCbCkbz+zIiiUZgINxQmwg2FiXBDYSLcUJgINxQmwg2FiXDzf1Tkmoc5LLxpAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] diff --git a/publication_analysis/results/scripts/utils.py b/publication_analysis/results/scripts/utils.py index db89afa..d47dcb3 100644 --- a/publication_analysis/results/scripts/utils.py +++ b/publication_analysis/results/scripts/utils.py @@ -60,12 +60,17 @@ def load_model_threshold_fpr(directory): mdict[k] = np.loadtxt(os.path.join(directory, f), skiprows=1, delimiter=',') return mdict -def load_prots(fname): +def load_prots(fname, deduplicate=True, strip_isoform=True): "Load protein pairs from csv file." prots2d = np.loadtxt(fname, delimiter=',', usecols=(0,1), dtype=str) - prots2d = strip_isoform_uniprotid(prots2d) + if strip_isoform: prots2d = strip_isoform_uniprotid(prots2d) prots1d = np.array([','.join(sorted(prot)) for prot in prots2d]) + if deduplicate: + _, m = np.unique(prots1d, return_index=True) + prots1d = prots1d[m] + prots2d = prots2d[m] + return prots1d, prots2d def strip_isoform_uniprotid(a):