diff --git a/review/2-1-stats/calc_stats.ipynb b/review/2-1-stats/calc_stats.ipynb
new file mode 100644
index 0000000..f00fa5e
--- /dev/null
+++ b/review/2-1-stats/calc_stats.ipynb
@@ -0,0 +1,1248 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import sys\n",
+ "import os\n",
+ "from os.path import dirname, realpath, join\n",
+ "base_dir = dirname(dirname(os.getcwd()))\n",
+ "import itertools\n",
+ "import pandas as pd\n",
+ "from os.path import join\n",
+ "base_dir\n",
+ "\n",
+ "sys.path.insert(0, base_dir)\n",
+ "from config_path import PROSTATE_DATA_PATH, PLOTS_PATH, GENE_PATH, PROSTATE_LOG_PATH\n",
+ "from data.data_access import Data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from utils.stats_utils_delong_xu import delong_roc_variance, delong_roc_test\n",
+ "from matplotlib import pyplot as plt\n",
+ "from utils.stats_utils import score_ci, pvalue, pvalue_stat\n",
+ "from sklearn import metrics\n",
+ "import numpy as np "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def read_predictions():\n",
+ " all_models_dict = {}\n",
+ " base_dir = PROSTATE_LOG_PATH\n",
+ " models_base_dir = join(base_dir , 'compare/onsplit_ML_test')\n",
+ " models = ['Linear Support Vector Machine ', 'RBF Support Vector Machine ', 'L2 Logistic Regression', 'Random Forest',\n",
+ " 'Adaptive Boosting', 'Decision Tree']\n",
+ "\n",
+ " for i, m in enumerate(models):\n",
+ " df = pd.read_csv(join(models_base_dir, m + '_data_0_testing.csv'), sep=',', index_col=0, header=0)\n",
+ " all_models_dict[m] = df\n",
+ "\n",
+ " pnet_base_dir = join(base_dir , 'pnet/onsplit_average_reg_10_tanh_large_testing')\n",
+ " df_pnet = pd.read_csv(join(pnet_base_dir, 'P-net_ALL_testing.csv'), sep=',', index_col=0, header=0)\n",
+ " all_models_dict['P-net'] = df_pnet\n",
+ " return all_models_dict"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "all_models_dict = read_predictions()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pnet_predictions = all_models_dict['P-net']\n",
+ "labels = pnet_predictions['y'].values.ravel()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " pred | \n",
+ " pred_scores | \n",
+ " y | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 01-087MM_BONE | \n",
+ " 1.0 | \n",
+ " 0.946647 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 01-095N1_LN | \n",
+ " 0.0 | \n",
+ " 0.127979 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 08-093J1_LN | \n",
+ " 1.0 | \n",
+ " 0.990789 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10362 | \n",
+ " 1.0 | \n",
+ " 0.475796 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " AAPC-IP_LG-069-Tumor-SM-3NC72 | \n",
+ " 0.0 | \n",
+ " 0.114404 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " pred pred_scores y\n",
+ "01-087MM_BONE 1.0 0.946647 1\n",
+ "01-095N1_LN 0.0 0.127979 1\n",
+ "08-093J1_LN 1.0 0.990789 1\n",
+ "10362 1.0 0.475796 0\n",
+ "AAPC-IP_LG-069-Tumor-SM-3NC72 0.0 0.114404 0"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "all_models_dict['P-net'].head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " pred | \n",
+ " pred_scores | \n",
+ " y | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 01-087MM_BONE | \n",
+ " 1 | \n",
+ " 0.765949 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 01-095N1_LN | \n",
+ " 0 | \n",
+ " 0.242726 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 08-093J1_LN | \n",
+ " 1 | \n",
+ " 0.956951 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10362 | \n",
+ " 1 | \n",
+ " 0.809066 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " AAPC-IP_LG-069-Tumor-SM-3NC72 | \n",
+ " 0 | \n",
+ " 0.238246 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " pred pred_scores y\n",
+ "01-087MM_BONE 1 0.765949 1\n",
+ "01-095N1_LN 0 0.242726 1\n",
+ "08-093J1_LN 1 0.956951 1\n",
+ "10362 1 0.809066 0\n",
+ "AAPC-IP_LG-069-Tumor-SM-3NC72 0 0.238246 0"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "all_models_dict['Linear Support Vector Machine '].head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "score_fun={}\n",
+ "score_fun['Accuracy'] = metrics.accuracy_score\n",
+ "score_fun['Precision'] = metrics.precision_score\n",
+ "score_fun['AUC'] = metrics.roc_auc_score\n",
+ "score_fun['F1'] = metrics.f1_score\n",
+ "score_fun['AUPR'] = metrics.average_precision_score\n",
+ "score_fun['Recall'] = metrics.recall_score\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def fdr(p_vals):\n",
+ "#https://stackoverflow.com/questions/25185205/calculating-adjusted-p-values-in-python\n",
+ " from scipy.stats import rankdata\n",
+ " ranked_p_values = rankdata(p_vals)\n",
+ " fdr = p_vals * len(p_vals) / ranked_p_values\n",
+ " fdr[fdr > 1] = 1\n",
+ "\n",
+ " return fdr"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "results=[]\n",
+ "pvalue_list=[]\n",
+ "delong_results=[]\n",
+ "for i, (model_name, predictions_df) in enumerate(all_models_dict.items()):\n",
+ " if model_name!='P-net':\n",
+ " pred2 = predictions_df['pred_scores'].values.ravel()\n",
+ " for func_name, func in score_fun.items():\n",
+ " \n",
+ " if func_name in ['AUC', 'AUPR']:\n",
+ " col_name= 'pred_scores'\n",
+ " else:\n",
+ " col_name= 'pred'\n",
+ " \n",
+ " pred_pnet= pnet_predictions[col_name].values.ravel()\n",
+ " pred_model= predictions_df[col_name].values.ravel()\n",
+ " \n",
+ " if func_name=='AUC':\n",
+ " pvalue_ = delong_roc_test(labels, pred_pnet, pred_model)\n",
+ " pvalue_delong = 10**pvalue_[0][0]/2\n",
+ " delong_results.append({'measure': 'AUC_DeLong', 'model':model_name, 'pvalue': pvalue_delong}) \n",
+ " \n",
+ " stat_fun= np.median\n",
+ " p, scores1, scores2, z = pvalue_stat( labels,pred_pnet,pred_model, func,n_bootstraps=2000, two_tailed=False, seed=1234, stat_fun=np.median)\n",
+ " med_pnet = stat_fun(scores1)\n",
+ " med_model = stat_fun(scores2)\n",
+ " stat_fun_diff = med_pnet - med_model\n",
+ " results.append({'measure': func_name, 'model':model_name, 'pvalue': p, 'model median': med_model, 'P-NET median':med_pnet, 'Median difference': stat_fun_diff }) \n",
+ " \n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "results_df = pd.DataFrame(results)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "results_df['pvalue_fdr_adjusted']= results_df.groupby('measure')['pvalue'].apply(fdr)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "results_df = results_df.set_index(['model', 'measure'])\n",
+ "results_df = results_df.round(3)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " | \n",
+ " Median difference | \n",
+ " P-NET median | \n",
+ " model median | \n",
+ " pvalue | \n",
+ " pvalue_fdr_adjusted | \n",
+ "
\n",
+ " \n",
+ " model | \n",
+ " measure | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " RBF Support Vector Machine | \n",
+ " AUPR | \n",
+ " 0.021 | \n",
+ " 0.881 | \n",
+ " 0.860 | \n",
+ " 0.205 | \n",
+ " 0.205 | \n",
+ "
\n",
+ " \n",
+ " F1 | \n",
+ " 0.054 | \n",
+ " 0.755 | \n",
+ " 0.702 | \n",
+ " 0.088 | \n",
+ " 0.177 | \n",
+ "
\n",
+ " \n",
+ " AUC | \n",
+ " 0.013 | \n",
+ " 0.928 | \n",
+ " 0.915 | \n",
+ " 0.212 | \n",
+ " 0.212 | \n",
+ "
\n",
+ " \n",
+ " Recall | \n",
+ " 0.151 | \n",
+ " 0.763 | \n",
+ " 0.612 | \n",
+ " 0.002 | \n",
+ " 0.003 | \n",
+ "
\n",
+ " \n",
+ " Precision | \n",
+ " -0.074 | \n",
+ " 0.750 | \n",
+ " 0.824 | \n",
+ " 0.927 | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " Accuracy | \n",
+ " 0.010 | \n",
+ " 0.838 | \n",
+ " 0.828 | \n",
+ " 0.372 | \n",
+ " 0.745 | \n",
+ "
\n",
+ " \n",
+ " Decision Tree | \n",
+ " AUPR | \n",
+ " 0.140 | \n",
+ " 0.881 | \n",
+ " 0.741 | \n",
+ " 0.001 | \n",
+ " 0.004 | \n",
+ "
\n",
+ " \n",
+ " F1 | \n",
+ " 0.035 | \n",
+ " 0.755 | \n",
+ " 0.720 | \n",
+ " 0.207 | \n",
+ " 0.248 | \n",
+ "
\n",
+ " \n",
+ " AUC | \n",
+ " 0.072 | \n",
+ " 0.928 | \n",
+ " 0.856 | \n",
+ " 0.000 | \n",
+ " 0.000 | \n",
+ "
\n",
+ " \n",
+ " Recall | \n",
+ " 0.147 | \n",
+ " 0.763 | \n",
+ " 0.615 | \n",
+ " 0.001 | \n",
+ " 0.003 | \n",
+ "
\n",
+ " \n",
+ " Precision | \n",
+ " -0.122 | \n",
+ " 0.750 | \n",
+ " 0.872 | \n",
+ " 0.978 | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " Accuracy | \n",
+ " -0.005 | \n",
+ " 0.838 | \n",
+ " 0.843 | \n",
+ " 0.602 | \n",
+ " 0.722 | \n",
+ "
\n",
+ " \n",
+ " L2 Logistic Regression | \n",
+ " AUPR | \n",
+ " 0.068 | \n",
+ " 0.881 | \n",
+ " 0.813 | \n",
+ " 0.001 | \n",
+ " 0.004 | \n",
+ "
\n",
+ " \n",
+ " F1 | \n",
+ " 0.028 | \n",
+ " 0.755 | \n",
+ " 0.727 | \n",
+ " 0.232 | \n",
+ " 0.232 | \n",
+ "
\n",
+ " \n",
+ " AUC | \n",
+ " 0.045 | \n",
+ " 0.928 | \n",
+ " 0.883 | \n",
+ " 0.006 | \n",
+ " 0.018 | \n",
+ "
\n",
+ " \n",
+ " Recall | \n",
+ " 0.091 | \n",
+ " 0.763 | \n",
+ " 0.672 | \n",
+ " 0.066 | \n",
+ " 0.066 | \n",
+ "
\n",
+ " \n",
+ " Precision | \n",
+ " -0.042 | \n",
+ " 0.750 | \n",
+ " 0.792 | \n",
+ " 0.826 | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " Accuracy | \n",
+ " 0.005 | \n",
+ " 0.838 | \n",
+ " 0.833 | \n",
+ " 0.452 | \n",
+ " 0.679 | \n",
+ "
\n",
+ " \n",
+ " Adaptive Boosting | \n",
+ " AUPR | \n",
+ " 0.050 | \n",
+ " 0.881 | \n",
+ " 0.831 | \n",
+ " 0.054 | \n",
+ " 0.080 | \n",
+ "
\n",
+ " \n",
+ " F1 | \n",
+ " 0.050 | \n",
+ " 0.755 | \n",
+ " 0.705 | \n",
+ " 0.126 | \n",
+ " 0.188 | \n",
+ "
\n",
+ " \n",
+ " AUC | \n",
+ " 0.039 | \n",
+ " 0.928 | \n",
+ " 0.889 | \n",
+ " 0.019 | \n",
+ " 0.038 | \n",
+ "
\n",
+ " \n",
+ " Recall | \n",
+ " 0.195 | \n",
+ " 0.763 | \n",
+ " 0.568 | \n",
+ " 0.000 | \n",
+ " 0.000 | \n",
+ "
\n",
+ " \n",
+ " Precision | \n",
+ " -0.180 | \n",
+ " 0.750 | \n",
+ " 0.930 | \n",
+ " 0.998 | \n",
+ " 0.998 | \n",
+ "
\n",
+ " \n",
+ " Accuracy | \n",
+ " -0.005 | \n",
+ " 0.838 | \n",
+ " 0.843 | \n",
+ " 0.609 | \n",
+ " 0.609 | \n",
+ "
\n",
+ " \n",
+ " Linear Support Vector Machine | \n",
+ " AUPR | \n",
+ " 0.024 | \n",
+ " 0.881 | \n",
+ " 0.857 | \n",
+ " 0.187 | \n",
+ " 0.224 | \n",
+ "
\n",
+ " \n",
+ " F1 | \n",
+ " 0.060 | \n",
+ " 0.755 | \n",
+ " 0.695 | \n",
+ " 0.066 | \n",
+ " 0.399 | \n",
+ "
\n",
+ " \n",
+ " AUC | \n",
+ " 0.021 | \n",
+ " 0.928 | \n",
+ " 0.907 | \n",
+ " 0.126 | \n",
+ " 0.151 | \n",
+ "
\n",
+ " \n",
+ " Recall | \n",
+ " 0.151 | \n",
+ " 0.763 | \n",
+ " 0.612 | \n",
+ " 0.002 | \n",
+ " 0.003 | \n",
+ "
\n",
+ " \n",
+ " Precision | \n",
+ " -0.057 | \n",
+ " 0.750 | \n",
+ " 0.807 | \n",
+ " 0.860 | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " Accuracy | \n",
+ " 0.015 | \n",
+ " 0.838 | \n",
+ " 0.824 | \n",
+ " 0.298 | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " Random Forest | \n",
+ " AUPR | \n",
+ " 0.058 | \n",
+ " 0.881 | \n",
+ " 0.823 | \n",
+ " 0.022 | \n",
+ " 0.043 | \n",
+ "
\n",
+ " \n",
+ " F1 | \n",
+ " 0.064 | \n",
+ " 0.755 | \n",
+ " 0.691 | \n",
+ " 0.075 | \n",
+ " 0.225 | \n",
+ "
\n",
+ " \n",
+ " AUC | \n",
+ " 0.033 | \n",
+ " 0.928 | \n",
+ " 0.895 | \n",
+ " 0.049 | \n",
+ " 0.073 | \n",
+ "
\n",
+ " \n",
+ " Recall | \n",
+ " 0.155 | \n",
+ " 0.763 | \n",
+ " 0.608 | \n",
+ " 0.004 | \n",
+ " 0.004 | \n",
+ "
\n",
+ " \n",
+ " Precision | \n",
+ " -0.054 | \n",
+ " 0.750 | \n",
+ " 0.804 | \n",
+ " 0.850 | \n",
+ " 1.000 | \n",
+ "
\n",
+ " \n",
+ " Accuracy | \n",
+ " 0.015 | \n",
+ " 0.838 | \n",
+ " 0.824 | \n",
+ " 0.310 | \n",
+ " 0.928 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Median difference P-NET median \\\n",
+ "model measure \n",
+ "RBF Support Vector Machine AUPR 0.021 0.881 \n",
+ " F1 0.054 0.755 \n",
+ " AUC 0.013 0.928 \n",
+ " Recall 0.151 0.763 \n",
+ " Precision -0.074 0.750 \n",
+ " Accuracy 0.010 0.838 \n",
+ "Decision Tree AUPR 0.140 0.881 \n",
+ " F1 0.035 0.755 \n",
+ " AUC 0.072 0.928 \n",
+ " Recall 0.147 0.763 \n",
+ " Precision -0.122 0.750 \n",
+ " Accuracy -0.005 0.838 \n",
+ "L2 Logistic Regression AUPR 0.068 0.881 \n",
+ " F1 0.028 0.755 \n",
+ " AUC 0.045 0.928 \n",
+ " Recall 0.091 0.763 \n",
+ " Precision -0.042 0.750 \n",
+ " Accuracy 0.005 0.838 \n",
+ "Adaptive Boosting AUPR 0.050 0.881 \n",
+ " F1 0.050 0.755 \n",
+ " AUC 0.039 0.928 \n",
+ " Recall 0.195 0.763 \n",
+ " Precision -0.180 0.750 \n",
+ " Accuracy -0.005 0.838 \n",
+ "Linear Support Vector Machine AUPR 0.024 0.881 \n",
+ " F1 0.060 0.755 \n",
+ " AUC 0.021 0.928 \n",
+ " Recall 0.151 0.763 \n",
+ " Precision -0.057 0.750 \n",
+ " Accuracy 0.015 0.838 \n",
+ "Random Forest AUPR 0.058 0.881 \n",
+ " F1 0.064 0.755 \n",
+ " AUC 0.033 0.928 \n",
+ " Recall 0.155 0.763 \n",
+ " Precision -0.054 0.750 \n",
+ " Accuracy 0.015 0.838 \n",
+ "\n",
+ " model median pvalue \\\n",
+ "model measure \n",
+ "RBF Support Vector Machine AUPR 0.860 0.205 \n",
+ " F1 0.702 0.088 \n",
+ " AUC 0.915 0.212 \n",
+ " Recall 0.612 0.002 \n",
+ " Precision 0.824 0.927 \n",
+ " Accuracy 0.828 0.372 \n",
+ "Decision Tree AUPR 0.741 0.001 \n",
+ " F1 0.720 0.207 \n",
+ " AUC 0.856 0.000 \n",
+ " Recall 0.615 0.001 \n",
+ " Precision 0.872 0.978 \n",
+ " Accuracy 0.843 0.602 \n",
+ "L2 Logistic Regression AUPR 0.813 0.001 \n",
+ " F1 0.727 0.232 \n",
+ " AUC 0.883 0.006 \n",
+ " Recall 0.672 0.066 \n",
+ " Precision 0.792 0.826 \n",
+ " Accuracy 0.833 0.452 \n",
+ "Adaptive Boosting AUPR 0.831 0.054 \n",
+ " F1 0.705 0.126 \n",
+ " AUC 0.889 0.019 \n",
+ " Recall 0.568 0.000 \n",
+ " Precision 0.930 0.998 \n",
+ " Accuracy 0.843 0.609 \n",
+ "Linear Support Vector Machine AUPR 0.857 0.187 \n",
+ " F1 0.695 0.066 \n",
+ " AUC 0.907 0.126 \n",
+ " Recall 0.612 0.002 \n",
+ " Precision 0.807 0.860 \n",
+ " Accuracy 0.824 0.298 \n",
+ "Random Forest AUPR 0.823 0.022 \n",
+ " F1 0.691 0.075 \n",
+ " AUC 0.895 0.049 \n",
+ " Recall 0.608 0.004 \n",
+ " Precision 0.804 0.850 \n",
+ " Accuracy 0.824 0.310 \n",
+ "\n",
+ " pvalue_fdr_adjusted \n",
+ "model measure \n",
+ "RBF Support Vector Machine AUPR 0.205 \n",
+ " F1 0.177 \n",
+ " AUC 0.212 \n",
+ " Recall 0.003 \n",
+ " Precision 1.000 \n",
+ " Accuracy 0.745 \n",
+ "Decision Tree AUPR 0.004 \n",
+ " F1 0.248 \n",
+ " AUC 0.000 \n",
+ " Recall 0.003 \n",
+ " Precision 1.000 \n",
+ " Accuracy 0.722 \n",
+ "L2 Logistic Regression AUPR 0.004 \n",
+ " F1 0.232 \n",
+ " AUC 0.018 \n",
+ " Recall 0.066 \n",
+ " Precision 1.000 \n",
+ " Accuracy 0.679 \n",
+ "Adaptive Boosting AUPR 0.080 \n",
+ " F1 0.188 \n",
+ " AUC 0.038 \n",
+ " Recall 0.000 \n",
+ " Precision 0.998 \n",
+ " Accuracy 0.609 \n",
+ "Linear Support Vector Machine AUPR 0.224 \n",
+ " F1 0.399 \n",
+ " AUC 0.151 \n",
+ " Recall 0.003 \n",
+ " Precision 1.000 \n",
+ " Accuracy 1.000 \n",
+ "Random Forest AUPR 0.043 \n",
+ " F1 0.225 \n",
+ " AUC 0.073 \n",
+ " Recall 0.004 \n",
+ " Precision 1.000 \n",
+ " Accuracy 0.928 "
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "results_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "results_df.to_csv('model_comparison_pvalue.csv')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " | \n",
+ " pvalue | \n",
+ " pvalue_fdr_adjusted | \n",
+ "
\n",
+ " \n",
+ " model | \n",
+ " measure | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " RBF Support Vector Machine | \n",
+ " AUC_DeLong | \n",
+ " 0.210 | \n",
+ " 0.210 | \n",
+ "
\n",
+ " \n",
+ " Decision Tree | \n",
+ " AUC_DeLong | \n",
+ " 0.001 | \n",
+ " 0.006 | \n",
+ "
\n",
+ " \n",
+ " L2 Logistic Regression | \n",
+ " AUC_DeLong | \n",
+ " 0.007 | \n",
+ " 0.021 | \n",
+ "
\n",
+ " \n",
+ " Adaptive Boosting | \n",
+ " AUC_DeLong | \n",
+ " 0.023 | \n",
+ " 0.046 | \n",
+ "
\n",
+ " \n",
+ " Linear Support Vector Machine | \n",
+ " AUC_DeLong | \n",
+ " 0.117 | \n",
+ " 0.140 | \n",
+ "
\n",
+ " \n",
+ " Random Forest | \n",
+ " AUC_DeLong | \n",
+ " 0.051 | \n",
+ " 0.076 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " pvalue pvalue_fdr_adjusted\n",
+ "model measure \n",
+ "RBF Support Vector Machine AUC_DeLong 0.210 0.210\n",
+ "Decision Tree AUC_DeLong 0.001 0.006\n",
+ "L2 Logistic Regression AUC_DeLong 0.007 0.021\n",
+ "Adaptive Boosting AUC_DeLong 0.023 0.046\n",
+ "Linear Support Vector Machine AUC_DeLong 0.117 0.140\n",
+ "Random Forest AUC_DeLong 0.051 0.076"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "results_delong_df = pd.DataFrame(delong_results)\n",
+ "results_delong_df['pvalue_fdr_adjusted']= results_delong_df.groupby('measure')['pvalue'].apply(fdr)\n",
+ "results_delong_df = results_delong_df.set_index(['model', 'measure'])\n",
+ "results_delong_df=results_delong_df.round(3)\n",
+ "results_delong_df.to_csv('model_comparison_delong_pvalue.csv')\n",
+ "results_delong_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " measure | \n",
+ " AUC | \n",
+ " AUPR | \n",
+ " Accuracy | \n",
+ " F1 | \n",
+ " Precision | \n",
+ " Recall | \n",
+ "
\n",
+ " \n",
+ " model | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Adaptive Boosting | \n",
+ " 0.0190 | \n",
+ " 0.0535 | \n",
+ " 0.6090 | \n",
+ " 0.1255 | \n",
+ " 0.9975 | \n",
+ " 0.0000 | \n",
+ "
\n",
+ " \n",
+ " Decision Tree | \n",
+ " 0.0000 | \n",
+ " 0.0010 | \n",
+ " 0.6015 | \n",
+ " 0.2070 | \n",
+ " 0.9780 | \n",
+ " 0.0010 | \n",
+ "
\n",
+ " \n",
+ " L2 Logistic Regression | \n",
+ " 0.0060 | \n",
+ " 0.0010 | \n",
+ " 0.4525 | \n",
+ " 0.2325 | \n",
+ " 0.8255 | \n",
+ " 0.0660 | \n",
+ "
\n",
+ " \n",
+ " Linear Support Vector Machine | \n",
+ " 0.1255 | \n",
+ " 0.1870 | \n",
+ " 0.2985 | \n",
+ " 0.0665 | \n",
+ " 0.8600 | \n",
+ " 0.0020 | \n",
+ "
\n",
+ " \n",
+ " RBF Support Vector Machine | \n",
+ " 0.2115 | \n",
+ " 0.2050 | \n",
+ " 0.3725 | \n",
+ " 0.0885 | \n",
+ " 0.9270 | \n",
+ " 0.0015 | \n",
+ "
\n",
+ " \n",
+ " Random Forest | \n",
+ " 0.0485 | \n",
+ " 0.0215 | \n",
+ " 0.3095 | \n",
+ " 0.0750 | \n",
+ " 0.8495 | \n",
+ " 0.0035 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "measure AUC AUPR Accuracy F1 Precision \\\n",
+ "model \n",
+ "Adaptive Boosting 0.0190 0.0535 0.6090 0.1255 0.9975 \n",
+ "Decision Tree 0.0000 0.0010 0.6015 0.2070 0.9780 \n",
+ "L2 Logistic Regression 0.0060 0.0010 0.4525 0.2325 0.8255 \n",
+ "Linear Support Vector Machine 0.1255 0.1870 0.2985 0.0665 0.8600 \n",
+ "RBF Support Vector Machine 0.2115 0.2050 0.3725 0.0885 0.9270 \n",
+ "Random Forest 0.0485 0.0215 0.3095 0.0750 0.8495 \n",
+ "\n",
+ "measure Recall \n",
+ "model \n",
+ "Adaptive Boosting 0.0000 \n",
+ "Decision Tree 0.0010 \n",
+ "L2 Logistic Regression 0.0660 \n",
+ "Linear Support Vector Machine 0.0020 \n",
+ "RBF Support Vector Machine 0.0015 \n",
+ "Random Forest 0.0035 "
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "df = pd.DataFrame(results)\n",
+ "df_cross = pd.crosstab( df['model'], df['measure'], values=df.pvalue, aggfunc='first') \n",
+ "\n",
+ "df_cross"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_cross_fdr_adjusted = df_cross.copy()\n",
+ "for c in df_cross_fdr_adjusted.columns:\n",
+ " df_cross_fdr_adjusted[c] = fdr(df_cross_fdr_adjusted[c])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " measure | \n",
+ " AUC | \n",
+ " AUPR | \n",
+ " Accuracy | \n",
+ " F1 | \n",
+ " Precision | \n",
+ " Recall | \n",
+ "
\n",
+ " \n",
+ " model | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Adaptive Boosting | \n",
+ " 0.038 | \n",
+ " 0.080 | \n",
+ " 0.609 | \n",
+ " 0.188 | \n",
+ " 0.998 | \n",
+ " 0.000 | \n",
+ "
\n",
+ " \n",
+ " Decision Tree | \n",
+ " 0.000 | \n",
+ " 0.004 | \n",
+ " 0.722 | \n",
+ " 0.248 | \n",
+ " 1.000 | \n",
+ " 0.003 | \n",
+ "
\n",
+ " \n",
+ " L2 Logistic Regression | \n",
+ " 0.018 | \n",
+ " 0.004 | \n",
+ " 0.679 | \n",
+ " 0.232 | \n",
+ " 1.000 | \n",
+ " 0.066 | \n",
+ "
\n",
+ " \n",
+ " Linear Support Vector Machine | \n",
+ " 0.151 | \n",
+ " 0.224 | \n",
+ " 1.000 | \n",
+ " 0.399 | \n",
+ " 1.000 | \n",
+ " 0.003 | \n",
+ "
\n",
+ " \n",
+ " RBF Support Vector Machine | \n",
+ " 0.212 | \n",
+ " 0.205 | \n",
+ " 0.745 | \n",
+ " 0.177 | \n",
+ " 1.000 | \n",
+ " 0.003 | \n",
+ "
\n",
+ " \n",
+ " Random Forest | \n",
+ " 0.073 | \n",
+ " 0.043 | \n",
+ " 0.928 | \n",
+ " 0.225 | \n",
+ " 1.000 | \n",
+ " 0.004 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "measure AUC AUPR Accuracy F1 Precision \\\n",
+ "model \n",
+ "Adaptive Boosting 0.038 0.080 0.609 0.188 0.998 \n",
+ "Decision Tree 0.000 0.004 0.722 0.248 1.000 \n",
+ "L2 Logistic Regression 0.018 0.004 0.679 0.232 1.000 \n",
+ "Linear Support Vector Machine 0.151 0.224 1.000 0.399 1.000 \n",
+ "RBF Support Vector Machine 0.212 0.205 0.745 0.177 1.000 \n",
+ "Random Forest 0.073 0.043 0.928 0.225 1.000 \n",
+ "\n",
+ "measure Recall \n",
+ "model \n",
+ "Adaptive Boosting 0.000 \n",
+ "Decision Tree 0.003 \n",
+ "L2 Logistic Regression 0.066 \n",
+ "Linear Support Vector Machine 0.003 \n",
+ "RBF Support Vector Machine 0.003 \n",
+ "Random Forest 0.004 "
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# df_cross_fdr_adjusted.applymap('{:.3f}'.format)\n",
+ "df_cross_fdr_adjusted = df_cross_fdr_adjusted.round(3)\n",
+ "df_cross_fdr_adjusted"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_cross_fdr_adjusted.to_csv('df_cross_fdr_adjusted.csv')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python [conda env:min_env]",
+ "language": "python",
+ "name": "conda-env-min_env-py"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 2
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython2",
+ "version": "2.7.15"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}