diff --git a/review/2-1-stats/calc_stats.ipynb b/review/2-1-stats/calc_stats.ipynb new file mode 100644 index 0000000..f00fa5e --- /dev/null +++ b/review/2-1-stats/calc_stats.ipynb @@ -0,0 +1,1248 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "from os.path import dirname, realpath, join\n", + "base_dir = dirname(dirname(os.getcwd()))\n", + "import itertools\n", + "import pandas as pd\n", + "from os.path import join\n", + "base_dir\n", + "\n", + "sys.path.insert(0, base_dir)\n", + "from config_path import PROSTATE_DATA_PATH, PLOTS_PATH, GENE_PATH, PROSTATE_LOG_PATH\n", + "from data.data_access import Data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from utils.stats_utils_delong_xu import delong_roc_variance, delong_roc_test\n", + "from matplotlib import pyplot as plt\n", + "from utils.stats_utils import score_ci, pvalue, pvalue_stat\n", + "from sklearn import metrics\n", + "import numpy as np " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def read_predictions():\n", + " all_models_dict = {}\n", + " base_dir = PROSTATE_LOG_PATH\n", + " models_base_dir = join(base_dir , 'compare/onsplit_ML_test')\n", + " models = ['Linear Support Vector Machine ', 'RBF Support Vector Machine ', 'L2 Logistic Regression', 'Random Forest',\n", + " 'Adaptive Boosting', 'Decision Tree']\n", + "\n", + " for i, m in enumerate(models):\n", + " df = pd.read_csv(join(models_base_dir, m + '_data_0_testing.csv'), sep=',', index_col=0, header=0)\n", + " all_models_dict[m] = df\n", + "\n", + " pnet_base_dir = join(base_dir , 'pnet/onsplit_average_reg_10_tanh_large_testing')\n", + " df_pnet = pd.read_csv(join(pnet_base_dir, 'P-net_ALL_testing.csv'), sep=',', index_col=0, header=0)\n", + " all_models_dict['P-net'] = df_pnet\n", + " return all_models_dict" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "all_models_dict = read_predictions()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "pnet_predictions = all_models_dict['P-net']\n", + "labels = pnet_predictions['y'].values.ravel()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
predpred_scoresy
01-087MM_BONE1.00.9466471
01-095N1_LN0.00.1279791
08-093J1_LN1.00.9907891
103621.00.4757960
AAPC-IP_LG-069-Tumor-SM-3NC720.00.1144040
\n", + "
" + ], + "text/plain": [ + " pred pred_scores y\n", + "01-087MM_BONE 1.0 0.946647 1\n", + "01-095N1_LN 0.0 0.127979 1\n", + "08-093J1_LN 1.0 0.990789 1\n", + "10362 1.0 0.475796 0\n", + "AAPC-IP_LG-069-Tumor-SM-3NC72 0.0 0.114404 0" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_models_dict['P-net'].head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
predpred_scoresy
01-087MM_BONE10.7659491
01-095N1_LN00.2427261
08-093J1_LN10.9569511
1036210.8090660
AAPC-IP_LG-069-Tumor-SM-3NC7200.2382460
\n", + "
" + ], + "text/plain": [ + " pred pred_scores y\n", + "01-087MM_BONE 1 0.765949 1\n", + "01-095N1_LN 0 0.242726 1\n", + "08-093J1_LN 1 0.956951 1\n", + "10362 1 0.809066 0\n", + "AAPC-IP_LG-069-Tumor-SM-3NC72 0 0.238246 0" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_models_dict['Linear Support Vector Machine '].head()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "score_fun={}\n", + "score_fun['Accuracy'] = metrics.accuracy_score\n", + "score_fun['Precision'] = metrics.precision_score\n", + "score_fun['AUC'] = metrics.roc_auc_score\n", + "score_fun['F1'] = metrics.f1_score\n", + "score_fun['AUPR'] = metrics.average_precision_score\n", + "score_fun['Recall'] = metrics.recall_score\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def fdr(p_vals):\n", + "#https://stackoverflow.com/questions/25185205/calculating-adjusted-p-values-in-python\n", + " from scipy.stats import rankdata\n", + " ranked_p_values = rankdata(p_vals)\n", + " fdr = p_vals * len(p_vals) / ranked_p_values\n", + " fdr[fdr > 1] = 1\n", + "\n", + " return fdr" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "results=[]\n", + "pvalue_list=[]\n", + "delong_results=[]\n", + "for i, (model_name, predictions_df) in enumerate(all_models_dict.items()):\n", + " if model_name!='P-net':\n", + " pred2 = predictions_df['pred_scores'].values.ravel()\n", + " for func_name, func in score_fun.items():\n", + " \n", + " if func_name in ['AUC', 'AUPR']:\n", + " col_name= 'pred_scores'\n", + " else:\n", + " col_name= 'pred'\n", + " \n", + " pred_pnet= pnet_predictions[col_name].values.ravel()\n", + " pred_model= predictions_df[col_name].values.ravel()\n", + " \n", + " if func_name=='AUC':\n", + " pvalue_ = delong_roc_test(labels, pred_pnet, pred_model)\n", + " pvalue_delong = 10**pvalue_[0][0]/2\n", + " delong_results.append({'measure': 'AUC_DeLong', 'model':model_name, 'pvalue': pvalue_delong}) \n", + " \n", + " stat_fun= np.median\n", + " p, scores1, scores2, z = pvalue_stat( labels,pred_pnet,pred_model, func,n_bootstraps=2000, two_tailed=False, seed=1234, stat_fun=np.median)\n", + " med_pnet = stat_fun(scores1)\n", + " med_model = stat_fun(scores2)\n", + " stat_fun_diff = med_pnet - med_model\n", + " results.append({'measure': func_name, 'model':model_name, 'pvalue': p, 'model median': med_model, 'P-NET median':med_pnet, 'Median difference': stat_fun_diff }) \n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "results_df = pd.DataFrame(results)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "results_df['pvalue_fdr_adjusted']= results_df.groupby('measure')['pvalue'].apply(fdr)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "results_df = results_df.set_index(['model', 'measure'])\n", + "results_df = results_df.round(3)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Median differenceP-NET medianmodel medianpvaluepvalue_fdr_adjusted
modelmeasure
RBF Support Vector MachineAUPR0.0210.8810.8600.2050.205
F10.0540.7550.7020.0880.177
AUC0.0130.9280.9150.2120.212
Recall0.1510.7630.6120.0020.003
Precision-0.0740.7500.8240.9271.000
Accuracy0.0100.8380.8280.3720.745
Decision TreeAUPR0.1400.8810.7410.0010.004
F10.0350.7550.7200.2070.248
AUC0.0720.9280.8560.0000.000
Recall0.1470.7630.6150.0010.003
Precision-0.1220.7500.8720.9781.000
Accuracy-0.0050.8380.8430.6020.722
L2 Logistic RegressionAUPR0.0680.8810.8130.0010.004
F10.0280.7550.7270.2320.232
AUC0.0450.9280.8830.0060.018
Recall0.0910.7630.6720.0660.066
Precision-0.0420.7500.7920.8261.000
Accuracy0.0050.8380.8330.4520.679
Adaptive BoostingAUPR0.0500.8810.8310.0540.080
F10.0500.7550.7050.1260.188
AUC0.0390.9280.8890.0190.038
Recall0.1950.7630.5680.0000.000
Precision-0.1800.7500.9300.9980.998
Accuracy-0.0050.8380.8430.6090.609
Linear Support Vector MachineAUPR0.0240.8810.8570.1870.224
F10.0600.7550.6950.0660.399
AUC0.0210.9280.9070.1260.151
Recall0.1510.7630.6120.0020.003
Precision-0.0570.7500.8070.8601.000
Accuracy0.0150.8380.8240.2981.000
Random ForestAUPR0.0580.8810.8230.0220.043
F10.0640.7550.6910.0750.225
AUC0.0330.9280.8950.0490.073
Recall0.1550.7630.6080.0040.004
Precision-0.0540.7500.8040.8501.000
Accuracy0.0150.8380.8240.3100.928
\n", + "
" + ], + "text/plain": [ + " Median difference P-NET median \\\n", + "model measure \n", + "RBF Support Vector Machine AUPR 0.021 0.881 \n", + " F1 0.054 0.755 \n", + " AUC 0.013 0.928 \n", + " Recall 0.151 0.763 \n", + " Precision -0.074 0.750 \n", + " Accuracy 0.010 0.838 \n", + "Decision Tree AUPR 0.140 0.881 \n", + " F1 0.035 0.755 \n", + " AUC 0.072 0.928 \n", + " Recall 0.147 0.763 \n", + " Precision -0.122 0.750 \n", + " Accuracy -0.005 0.838 \n", + "L2 Logistic Regression AUPR 0.068 0.881 \n", + " F1 0.028 0.755 \n", + " AUC 0.045 0.928 \n", + " Recall 0.091 0.763 \n", + " Precision -0.042 0.750 \n", + " Accuracy 0.005 0.838 \n", + "Adaptive Boosting AUPR 0.050 0.881 \n", + " F1 0.050 0.755 \n", + " AUC 0.039 0.928 \n", + " Recall 0.195 0.763 \n", + " Precision -0.180 0.750 \n", + " Accuracy -0.005 0.838 \n", + "Linear Support Vector Machine AUPR 0.024 0.881 \n", + " F1 0.060 0.755 \n", + " AUC 0.021 0.928 \n", + " Recall 0.151 0.763 \n", + " Precision -0.057 0.750 \n", + " Accuracy 0.015 0.838 \n", + "Random Forest AUPR 0.058 0.881 \n", + " F1 0.064 0.755 \n", + " AUC 0.033 0.928 \n", + " Recall 0.155 0.763 \n", + " Precision -0.054 0.750 \n", + " Accuracy 0.015 0.838 \n", + "\n", + " model median pvalue \\\n", + "model measure \n", + "RBF Support Vector Machine AUPR 0.860 0.205 \n", + " F1 0.702 0.088 \n", + " AUC 0.915 0.212 \n", + " Recall 0.612 0.002 \n", + " Precision 0.824 0.927 \n", + " Accuracy 0.828 0.372 \n", + "Decision Tree AUPR 0.741 0.001 \n", + " F1 0.720 0.207 \n", + " AUC 0.856 0.000 \n", + " Recall 0.615 0.001 \n", + " Precision 0.872 0.978 \n", + " Accuracy 0.843 0.602 \n", + "L2 Logistic Regression AUPR 0.813 0.001 \n", + " F1 0.727 0.232 \n", + " AUC 0.883 0.006 \n", + " Recall 0.672 0.066 \n", + " Precision 0.792 0.826 \n", + " Accuracy 0.833 0.452 \n", + "Adaptive Boosting AUPR 0.831 0.054 \n", + " F1 0.705 0.126 \n", + " AUC 0.889 0.019 \n", + " Recall 0.568 0.000 \n", + " Precision 0.930 0.998 \n", + " Accuracy 0.843 0.609 \n", + "Linear Support Vector Machine AUPR 0.857 0.187 \n", + " F1 0.695 0.066 \n", + " AUC 0.907 0.126 \n", + " Recall 0.612 0.002 \n", + " Precision 0.807 0.860 \n", + " Accuracy 0.824 0.298 \n", + "Random Forest AUPR 0.823 0.022 \n", + " F1 0.691 0.075 \n", + " AUC 0.895 0.049 \n", + " Recall 0.608 0.004 \n", + " Precision 0.804 0.850 \n", + " Accuracy 0.824 0.310 \n", + "\n", + " pvalue_fdr_adjusted \n", + "model measure \n", + "RBF Support Vector Machine AUPR 0.205 \n", + " F1 0.177 \n", + " AUC 0.212 \n", + " Recall 0.003 \n", + " Precision 1.000 \n", + " Accuracy 0.745 \n", + "Decision Tree AUPR 0.004 \n", + " F1 0.248 \n", + " AUC 0.000 \n", + " Recall 0.003 \n", + " Precision 1.000 \n", + " Accuracy 0.722 \n", + "L2 Logistic Regression AUPR 0.004 \n", + " F1 0.232 \n", + " AUC 0.018 \n", + " Recall 0.066 \n", + " Precision 1.000 \n", + " Accuracy 0.679 \n", + "Adaptive Boosting AUPR 0.080 \n", + " F1 0.188 \n", + " AUC 0.038 \n", + " Recall 0.000 \n", + " Precision 0.998 \n", + " Accuracy 0.609 \n", + "Linear Support Vector Machine AUPR 0.224 \n", + " F1 0.399 \n", + " AUC 0.151 \n", + " Recall 0.003 \n", + " Precision 1.000 \n", + " Accuracy 1.000 \n", + "Random Forest AUPR 0.043 \n", + " F1 0.225 \n", + " AUC 0.073 \n", + " Recall 0.004 \n", + " Precision 1.000 \n", + " Accuracy 0.928 " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results_df" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "results_df.to_csv('model_comparison_pvalue.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pvaluepvalue_fdr_adjusted
modelmeasure
RBF Support Vector MachineAUC_DeLong0.2100.210
Decision TreeAUC_DeLong0.0010.006
L2 Logistic RegressionAUC_DeLong0.0070.021
Adaptive BoostingAUC_DeLong0.0230.046
Linear Support Vector MachineAUC_DeLong0.1170.140
Random ForestAUC_DeLong0.0510.076
\n", + "
" + ], + "text/plain": [ + " pvalue pvalue_fdr_adjusted\n", + "model measure \n", + "RBF Support Vector Machine AUC_DeLong 0.210 0.210\n", + "Decision Tree AUC_DeLong 0.001 0.006\n", + "L2 Logistic Regression AUC_DeLong 0.007 0.021\n", + "Adaptive Boosting AUC_DeLong 0.023 0.046\n", + "Linear Support Vector Machine AUC_DeLong 0.117 0.140\n", + "Random Forest AUC_DeLong 0.051 0.076" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results_delong_df = pd.DataFrame(delong_results)\n", + "results_delong_df['pvalue_fdr_adjusted']= results_delong_df.groupby('measure')['pvalue'].apply(fdr)\n", + "results_delong_df = results_delong_df.set_index(['model', 'measure'])\n", + "results_delong_df=results_delong_df.round(3)\n", + "results_delong_df.to_csv('model_comparison_delong_pvalue.csv')\n", + "results_delong_df" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
measureAUCAUPRAccuracyF1PrecisionRecall
model
Adaptive Boosting0.01900.05350.60900.12550.99750.0000
Decision Tree0.00000.00100.60150.20700.97800.0010
L2 Logistic Regression0.00600.00100.45250.23250.82550.0660
Linear Support Vector Machine0.12550.18700.29850.06650.86000.0020
RBF Support Vector Machine0.21150.20500.37250.08850.92700.0015
Random Forest0.04850.02150.30950.07500.84950.0035
\n", + "
" + ], + "text/plain": [ + "measure AUC AUPR Accuracy F1 Precision \\\n", + "model \n", + "Adaptive Boosting 0.0190 0.0535 0.6090 0.1255 0.9975 \n", + "Decision Tree 0.0000 0.0010 0.6015 0.2070 0.9780 \n", + "L2 Logistic Regression 0.0060 0.0010 0.4525 0.2325 0.8255 \n", + "Linear Support Vector Machine 0.1255 0.1870 0.2985 0.0665 0.8600 \n", + "RBF Support Vector Machine 0.2115 0.2050 0.3725 0.0885 0.9270 \n", + "Random Forest 0.0485 0.0215 0.3095 0.0750 0.8495 \n", + "\n", + "measure Recall \n", + "model \n", + "Adaptive Boosting 0.0000 \n", + "Decision Tree 0.0010 \n", + "L2 Logistic Regression 0.0660 \n", + "Linear Support Vector Machine 0.0020 \n", + "RBF Support Vector Machine 0.0015 \n", + "Random Forest 0.0035 " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "df = pd.DataFrame(results)\n", + "df_cross = pd.crosstab( df['model'], df['measure'], values=df.pvalue, aggfunc='first') \n", + "\n", + "df_cross" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "df_cross_fdr_adjusted = df_cross.copy()\n", + "for c in df_cross_fdr_adjusted.columns:\n", + " df_cross_fdr_adjusted[c] = fdr(df_cross_fdr_adjusted[c])" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
measureAUCAUPRAccuracyF1PrecisionRecall
model
Adaptive Boosting0.0380.0800.6090.1880.9980.000
Decision Tree0.0000.0040.7220.2481.0000.003
L2 Logistic Regression0.0180.0040.6790.2321.0000.066
Linear Support Vector Machine0.1510.2241.0000.3991.0000.003
RBF Support Vector Machine0.2120.2050.7450.1771.0000.003
Random Forest0.0730.0430.9280.2251.0000.004
\n", + "
" + ], + "text/plain": [ + "measure AUC AUPR Accuracy F1 Precision \\\n", + "model \n", + "Adaptive Boosting 0.038 0.080 0.609 0.188 0.998 \n", + "Decision Tree 0.000 0.004 0.722 0.248 1.000 \n", + "L2 Logistic Regression 0.018 0.004 0.679 0.232 1.000 \n", + "Linear Support Vector Machine 0.151 0.224 1.000 0.399 1.000 \n", + "RBF Support Vector Machine 0.212 0.205 0.745 0.177 1.000 \n", + "Random Forest 0.073 0.043 0.928 0.225 1.000 \n", + "\n", + "measure Recall \n", + "model \n", + "Adaptive Boosting 0.000 \n", + "Decision Tree 0.003 \n", + "L2 Logistic Regression 0.066 \n", + "Linear Support Vector Machine 0.003 \n", + "RBF Support Vector Machine 0.003 \n", + "Random Forest 0.004 " + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# df_cross_fdr_adjusted.applymap('{:.3f}'.format)\n", + "df_cross_fdr_adjusted = df_cross_fdr_adjusted.round(3)\n", + "df_cross_fdr_adjusted" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "df_cross_fdr_adjusted.to_csv('df_cross_fdr_adjusted.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:min_env]", + "language": "python", + "name": "conda-env-min_env-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.15" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}