diff --git a/Visulization/Statistical Test.ipynb b/Visulization/Statistical Test.ipynb new file mode 100644 index 0000000..48b139d --- /dev/null +++ b/Visulization/Statistical Test.ipynb @@ -0,0 +1,296 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from statsmodels.stats.proportion import proportions_ztest\n", + "pd.set_option('mode.chained_assignment', None)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# data import\n", + "train = pd.read_csv('Training_Dataset.csv')\n", + "train_labels = pd.read_csv(\"Labels_Train.csv\")\n", + "test = pd.read_csv(\"Repackaged_Benign_Testset.csv\")\n", + "test_labels = pd.read_csv(\"Labels_Repackaged_Benign_Test.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "train['label'] = train_labels['label']\n", + "test['label'] = test_labels['label']\n", + "data = pd.concat([train, test])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of benign apps: 4862\n", + "Number of malicious apps: 9142\n" + ] + } + ], + "source": [ + "print(\"Number of benign apps:\", data[data['label']==0].shape[0])\n", + "print(\"Number of malicious apps:\", data[data['label']==1].shape[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "sensor_types = data.iloc[:, -30:-1].columns" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# get number of sensors used\n", + "data['num_sensors'] = data[sensor_types].sum(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 14004.000000\n", + "mean 0.384462\n", + "std 1.569956\n", + "min 0.000000\n", + "25% 0.000000\n", + "50% 0.000000\n", + "75% 0.000000\n", + "max 29.000000\n", + "Name: num_sensors, dtype: float64" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data['num_sensors'].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Distribution of number of sensors used (Malicious apps)')" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.hist(data[data['label']==1]['num_sensors'])\n", + "plt.title(\"Distribution of number of sensors used (Malicious apps)\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Distribution of number of sensors used (Benign apps)')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEICAYAAAC3Y/QeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy86wFpkAAAACXBIWXMAAAsTAAALEwEAmpwYAAAZ6UlEQVR4nO3ce5hdVX3/8feHJEAIBBITMCSBAYyVkFqUiGnRmgItKWpDL2ioQrD0F7lYodWWQPURrCloq/LQn1BRfEhESVOhElF+mkYi+isFEorGEJAUYhIScuFiEopIwrd/rDVk5+ScmTOXzGTO+rye5zxz9tp7r73Wvnxmn3X2jCICMzMry3793QAzM+t7Dn8zswI5/M3MCuTwNzMrkMPfzKxADn8zswK1fPhL+mdJH++luo6StF3SoDy9RNKf90bdub67Jc3srfq6sN1PSdoi6em+3nZNO6ZKWteP2/9DSWvzMX5Tf7WjFUhqkxSSBnewzDWSLuuj9myXdGxfbGtvk/QHkub3tJ4BHf6SVkt6UdI2Sc9L+g9JF0p6tV8RcWFE/F2TdZ3e0TIRsSYiDo6Inb3Q9qsk3VpT/+9HxNye1t3FdowHPgJMjIjX9uW290H/CHwoH+P/6u/GtDJJo4HzgC/m6amSXskhvV3SU5Ku7q3t5WP6RG/V158iYiEwSdIbe1LPgA7/7N0RcQhwNHAtcDlwc29vpKM7mAHuaOCZiNjU3w3pTd08XkcDK3q7LXvbAD03zwe+ExEvVsrW55A+GHgbcIGks/qjcQPAbcCsHtUQEQP2BawGTq8pOxl4BZiUp28BPpXfjwLuAp4HngV+SPoF+NW8zovAduBvgDYggAuANcC9lbLBub4lwDXAA8AvgDuBkXneVGBdvfYC04BfAS/n7f24Ut+f5/f7AR8Dfg5sAuYBh+Z57e2Ymdu2BfjbDvbToXn9zbm+j+X6T899fiW345Y6604F1pE+HWwCNgAfqMx/tc15+nzgR5XpAC4GHge2AX8HHAfcB2wFFgD712zrytyn1cD7KnUdQLo7XwNsBP4ZGFqz7uXA08BX6/Sl7j7N9W7PbX0B+O866wr4fF7vF8BP2HWONdOuRvvvTOCRvG+eAj5amfd/gFWkc3UhcGTNfr0k79cnO2pfZ9cNcBVwa35/IHAr8AzpOnkQOKJyHt2c+/AU8ClgUJ43KO+DLcATuW2vXit12vB94P2151nNMguAKyvTbwAW5f3xGPCeyrxbgC8A38778n7guJr99br8/jXAt0jn34O5H7Xn7IV53z6X61WDfpxMOpefz/vl/5LP50pdH877ZAvwD8B+lWvl/wP/lI/Zo8BpNdfSE7k/T7L7tXAK8GSP8nNvhHJfvWpP4kr5GuCiyknRHv7XkC7MIfn19vaDWueCaMsHbh4wDBhK/fB/CpiUl7mdXRdRvZP51W1QueAq85ewK/z/jHThHwscDNxBDrRKO76U2/UbwEvA8Q320zzSL6ZD8ro/Ay5o1M6adacCO4BP5n12JvA/wIjaNldO2NoLaSEwHDght3Nx7tehpOCbWbOtz5EC9R2kMP61PP+6XNfI3JdvAdfUrPvpvO7QOn1puE9rA6LOumcAy4DDSEF7PDCmC+1qtP82AG/P70cAb87vTyWFxZtzf/4JuLemrYvyNod21L7Orht2D/8P5vYfRAr0k4Dhed43ScM0w4DDSTc9H8zzLiSF1/jcpnvoOPw3A2+pOc/WVaYnkK6tU/P0MGAt8AFgcN4vW4ATKtf5s6QwHgx8DZhf79gC8/PrIGBirrf2nL0r78ujclunNejHScCUvM02YCVwWU1d9+R9chTp2mu/xs/P58Zf5nPjvaRfAiNzf7ey69wf097XPD0y1z282/nZW0HcH6/ak7hS/p/kO2F2D/9PkkJwjwu8zgXRlnfusXXKquF/bWX+RNId/aDak7l2G3Qe/ouBiyvzfo30SaH9JAtgXGX+A8CMOv0aRArciZWyDwJL6l10ddafSvp0MLhStgmYUtvmygldeyGdUpleBlxemf4scF1lWzuAYZX5C4CPkwLtBXa/m/tN8t1PXvdXwIEd9KXhPq20tVH4n0q6cKeQ79xyeTPt6mj/rcnHY3jN9m4GPlOZPji3ta3S1lM7a18z1w27h/+fAf8BvLFmnSPyeTS0UnYOcE9+/33gwsq836Pj8H8ZeEPNefYK6Q56a173DnZ9Knwv8MOaOr4IfKJynX+5Mu9M4NGa8/B1pOvhZXKo5nn17vzfVnMOzu5on1aWvQz4t5q6plWmLwYWV66V9VQ+VZCu43NJ4f888MfUv5EZkus+qpl21Xu1wph/PWNJdwG1/oF05/c9SU9Imt1EXWu7MP/npIMyqqlWduzIXF+17sGki7Bd9emc/yEFRK1RwP516hrbhbY8ExE7mthWIxsr71+sM12t67mIeKEy/XPSvhhNulNblr/cfx74f7m83eaI+GUH7Whmn9YVEd8nfaT/ArBR0k2ShjfZro723x+Tgurnkn4g6TfrtTUitpOGYqrHbW1lfqP2ddVXge8C8yWtl/QZSUNI34cMATZU+vlF0ieA9vbWXgsdeY70KalqfUQcFhHDSXfdLwLtD0AcDby1fdt5++8Dqg8pNHM9jCYd82pb613jzdSFpNdLukvS05K2An/Pntd/7X45sjL9VOQ0r87P18B7SZ+oNkj6tqQ3VJZr33fP12tXM1ou/CW9hXSB/Kh2XkRsi4iPRMSxwLuBv5J0WvvsBlU2Km83vvL+KNJdxRbS3eBBlXYNYvdA6Kze9aQTvlr3DnYPzmZsyW2qreupLtbTyG79ZPeLsTtGSBpWmT6KtC+2kMLghBwQh0XEoZG+HGy3V/dpRFwfESeRhq9eD/x1k+3qqM4HI2I6KUS/SbrL3KOteZ+8ht2P2279bdC+ehoes4h4OSKujoiJwG8B7yI9lbOWdOc/qtLP4RFxQl51A3teCx35SW5jXRHxC+DrpOuUvP0fVLZ9WKQvhy/qZDu1NpOO+bhK2fgGyzbjRtJw14T8S+tK0qfBqtr9sr4yPVaS6s2PiO9GxO+ShnweJQ3ztjseWB0RW7vb8JYJf0nDJb2LNJZ3a0Qsr7PMuyS9Lu/srcDO/IIUAN15Dvj9kiZKOog0rPSNSI+C/gw4UNI7853Tx0hjt+02Am3Vx1Jr3Ab8paRjJB1MuqP4l5o7yE7ltiwA5kg6RNLRwF+RvtTrDQ8DfyTpIEmvI31B3lNXS9pf0ttJ4fOvEfEK6eT/vKTDASSNlXRGF+rt9j6V9BZJb83H8gXgl8DOnrQr9/F9kg6NiJfZdU5CCr4PSDpR0gG5rfdHxOqutK/Bph8GZkgaImky8CeVen5H0q/nm5WtpBuHnRGxAfge8Nl8re0n6ThJ78irLgA+LGmcpBFAZ5+qv0P6TqfRvjkYmMGup6/uAl4v6dzc7iG5z8d3sp3d5OvhDuCqfM6+gfTLrbsOIe2n7bmuer+M/lrSiPxY9aXAv1TmHU7ab0MknU0K9e9IOkLpef5hpF+629n9eL4DuLsH7W6J8P+WpG2kO4O/JX1Z+IEGy04A/p20I+8DboiIJXneNcDH8kfKj3Zh+18ljTc+TXpS4sPw6p3LxcCXSXdrL5Ce+mj3r/nnM5IeqlPvV3Ld95K+6f8l8BddaFfVX+TtP0H6RPT1XH9v+DxprH0j6SP613pY39OkIYH1ua4LI+LRPO9y0rDdf+aP2P9OGrdvVk/26XBSyD9H+mj+DOnplp6261xgdV7vQuD9ABGxmPRdx+2ku+rjSGHYnfbV+niu7zngatL50O61wDdIgbYS+AG7bhTOIw0hPpLX/QbprpS87e8CPwYeIgVsR+YBZ0oaWik7sv05/9yHkaShHSJiG+l7hBmkc+Npdn2531UfIj1s8DTpfLiNFLDd8VHgT0lP5HyJ3YO93Z2k77oeJj2NVH0U/X5SLm0B5gB/EhHPkLL5I6S+PksK+4sr651D/huJ7mp/0sXMrE9J+ntgU0Rc18/t+DTw2oiYuRfqDtKQ0Ko6884nPSzxti7W+W7g3Ih4T0/aNhD/OMTMWkBEXNkf283DM/sDy4G3kIYqe+3ftOxtEfEt0uO4PeLwN7PSHEIa6jmS9NjtZ0lDM0XxsI+ZWYFa4QtfMzPron1+2GfUqFHR1tbW380wMxtQli1btiUiRjeav8+Hf1tbG0uXLu3vZpiZDSiSOvwraw/7mJkVyOFvZlYgh7+ZWYEc/mZmBXL4m5kVyOFvZlYgh7+ZWYEc/mZmBXL4m5kVaJ//C9+eaJv97X7Z7upr39kv2zUza5bv/M3MCuTwNzMrkMPfzKxADn8zswI5/M3MCuTwNzMrkMPfzKxADn8zswI5/M3MCuTwNzMrkMPfzKxADn8zswI5/M3MCuTwNzMrkMPfzKxADn8zswI5/M3MCuTwNzMrkMPfzKxADn8zswI5/M3MCuTwNzMrkMPfzKxADn8zswI5/M3MCuTwNzMrkMPfzKxATYe/pEGS/kvSXXl6pKRFkh7PP0dUlr1C0ipJj0k6o1J+kqTled71ktS73TEzs2Z05c7/UmBlZXo2sDgiJgCL8zSSJgIzgBOAacANkgbldW4EZgET8mtaj1pvZmbd0lT4SxoHvBP4cqV4OjA3v58LnFUpnx8RL0XEk8Aq4GRJY4DhEXFfRAQwr7KOmZn1oWbv/K8D/gZ4pVJ2RERsAMg/D8/lY4G1leXW5bKx+X1t+R4kzZK0VNLSzZs3N9lEMzNrVqfhL+ldwKaIWNZknfXG8aOD8j0LI26KiMkRMXn06NFNbtbMzJo1uIllTgH+QNKZwIHAcEm3AhsljYmIDXlIZ1Nefh0wvrL+OGB9Lh9Xp9zMzPpYp3f+EXFFRIyLiDbSF7nfj4j3AwuBmXmxmcCd+f1CYIakAyQdQ/pi94E8NLRN0pT8lM95lXXMzKwPNXPn38i1wAJJFwBrgLMBImKFpAXAI8AO4JKI2JnXuQi4BRgK3J1fZmbWx7oU/hGxBFiS3z8DnNZguTnAnDrlS4FJXW2kmZn1Lv+Fr5lZgRz+ZmYFcvibmRXI4W9mViCHv5lZgRz+ZmYFcvibmRXI4W9mViCHv5lZgRz+ZmYFcvibmRXI4W9mViCHv5lZgRz+ZmYFcvibmRXI4W9mViCHv5lZgRz+ZmYFcvibmRXI4W9mViCHv5lZgRz+ZmYFcvibmRXI4W9mViCHv5lZgRz+ZmYFcvibmRXI4W9mViCHv5lZgRz+ZmYFcvibmRXI4W9mViCHv5lZgRz+ZmYFcvibmRWo0/CXdKCkByT9WNIKSVfn8pGSFkl6PP8cUVnnCkmrJD0m6YxK+UmSlud510vS3umWmZl1pJk7/5eAUyPiN4ATgWmSpgCzgcURMQFYnKeRNBGYAZwATANukDQo13UjMAuYkF/Teq8rZmbWrE7DP5LteXJIfgUwHZiby+cCZ+X304H5EfFSRDwJrAJOljQGGB4R90VEAPMq65iZWR9qasxf0iBJDwObgEURcT9wRERsAMg/D8+LjwXWVlZfl8vG5ve15fW2N0vSUklLN2/e3IXumJlZM5oK/4jYGREnAuNId/GTOli83jh+dFBeb3s3RcTkiJg8evToZppoZmZd0KWnfSLieWAJaax+Yx7KIf/clBdbB4yvrDYOWJ/Lx9UpNzOzPtbM0z6jJR2W3w8FTgceBRYCM/NiM4E78/uFwAxJB0g6hvTF7gN5aGibpCn5KZ/zKuuYmVkfGtzEMmOAufmJnf2ABRFxl6T7gAWSLgDWAGcDRMQKSQuAR4AdwCURsTPXdRFwCzAUuDu/zMysj3Ua/hHxE+BNdcqfAU5rsM4cYE6d8qVAR98XmJlZH/Bf+JqZFcjhb2ZWIIe/mVmBHP5mZgVy+JuZFcjhb2ZWIIe/mVmBHP5mZgVy+JuZFcjhb2ZWIIe/mVmBHP5mZgVy+JuZFcjhb2ZWIIe/mVmBHP5mZgVy+JuZFcjhb2ZWIIe/mVmBHP5mZgVy+JuZFcjhb2ZWIIe/mVmBHP5mZgVy+JuZFcjhb2ZWIIe/mVmBHP5mZgVy+JuZFcjhb2ZWIIe/mVmBHP5mZgVy+JuZFcjhb2ZWIIe/mVmBOg1/SeMl3SNppaQVki7N5SMlLZL0eP45orLOFZJWSXpM0hmV8pMkLc/zrpekvdMtMzPrSDN3/juAj0TE8cAU4BJJE4HZwOKImAAsztPkeTOAE4BpwA2SBuW6bgRmARPya1ov9sXMzJrUafhHxIaIeCi/3wasBMYC04G5ebG5wFn5/XRgfkS8FBFPAquAkyWNAYZHxH0REcC8yjpmZtaHujTmL6kNeBNwP3BERGyA9AsCODwvNhZYW1ltXS4bm9/XltfbzixJSyUt3bx5c1eaaGZmTWg6/CUdDNwOXBYRWztatE5ZdFC+Z2HETRExOSImjx49utkmmplZk5oKf0lDSMH/tYi4IxdvzEM55J+bcvk6YHxl9XHA+lw+rk65mZn1sWae9hFwM7AyIj5XmbUQmJnfzwTurJTPkHSApGNIX+w+kIeGtkmakus8r7KOmZn1ocFNLHMKcC6wXNLDuexK4FpggaQLgDXA2QARsULSAuAR0pNCl0TEzrzeRcAtwFDg7vwyM7M+1mn4R8SPqD9eD3Bag3XmAHPqlC8FJnWlgWZm1vv8F75mZgVy+JuZFcjhb2ZWIIe/mVmBHP5mZgVy+JuZFcjhb2ZWIIe/mVmBHP5mZgVy+JuZFcjhb2ZWIIe/mVmBHP5mZgVy+JuZFcjhb2ZWIIe/mVmBHP5mZgVy+JuZFcjhb2ZWIIe/mVmBHP5mZgVy+JuZFcjhb2ZWIIe/mVmBHP5mZgVy+JuZFcjhb2ZWIIe/mVmBHP5mZgVy+JuZFcjhb2ZWIIe/mVmBHP5mZgVy+JuZFcjhb2ZWoE7DX9JXJG2S9NNK2UhJiyQ9nn+OqMy7QtIqSY9JOqNSfpKk5Xne9ZLU+90xM7NmNHPnfwswraZsNrA4IiYAi/M0kiYCM4AT8jo3SBqU17kRmAVMyK/aOs3MrI90Gv4RcS/wbE3xdGBufj8XOKtSPj8iXoqIJ4FVwMmSxgDDI+K+iAhgXmUdMzPrY90d8z8iIjYA5J+H5/KxwNrKcuty2dj8vra8LkmzJC2VtHTz5s3dbKKZmTXS21/41hvHjw7K64qImyJickRMHj16dK81zszMku6G/8Y8lEP+uSmXrwPGV5YbB6zP5ePqlJuZWT/obvgvBGbm9zOBOyvlMyQdIOkY0he7D+ShoW2SpuSnfM6rrGNmZn1scGcLSLoNmAqMkrQO+ARwLbBA0gXAGuBsgIhYIWkB8AiwA7gkInbmqi4iPTk0FLg7v8zMrB90Gv4RcU6DWac1WH4OMKdO+VJgUpdaZ2Zme4X/wtfMrEAOfzOzAjn8zcwK5PA3MyuQw9/MrEAOfzOzAjn8zcwK5PA3MyuQw9/MrEAOfzOzAjn8zcwK5PA3MyuQw9/MrEAOfzOzAjn8zcwK5PA3MyuQw9/MrEAOfzOzAjn8zcwK5PA3MyuQw9/MrEAOfzOzAjn8zcwK5PA3MyuQw9/MrEAOfzOzAjn8zcwK5PA3MyuQw9/MrEAOfzOzAjn8zcwK5PA3MyuQw9/MrECD+7sB1rvaZn+7X7a7+tp39st2zax7fOdvZlagPg9/SdMkPSZplaTZfb19MzPr4/CXNAj4AvD7wETgHEkT+7INZmbW92P+JwOrIuIJAEnzgenAI33cDjOzprTq92h9Hf5jgbWV6XXAW2sXkjQLmJUnt0t6rJvbGwVs6ea63aZP77Wq+6U/zehBn/fZPnVTq/UHWq9PA6I/Xbym6vXp6I5W6OvwV52y2KMg4ibgph5vTFoaEZN7Ws++otX6A63Xp1brD7Ren1qtP9C9PvX1F77rgPGV6XHA+j5ug5lZ8fo6/B8EJkg6RtL+wAxgYR+3wcyseH067BMROyR9CPguMAj4SkSs2Iub7PHQ0T6m1foDrdenVusPtF6fWq0/0I0+KWKPIXczM2tx/gtfM7MCOfzNzArUkuHfiv9CQtJqScslPSxpaX+3pzskfUXSJkk/rZSNlLRI0uP554j+bGNXNOjPVZKeysfpYUln9mcbu0LSeEn3SFopaYWkS3P5QD5Gjfo0II+TpAMlPSDpx7k/V+fyLh+jlhvzz/9C4mfA75IeLX0QOCciBvRfEUtaDUyOiH3+j1MakfTbwHZgXkRMymWfAZ6NiGvzL+oREXF5f7azWQ36cxWwPSL+sT/b1h2SxgBjIuIhSYcAy4CzgPMZuMeoUZ/ewwA8TpIEDIuI7ZKGAD8CLgX+iC4eo1a883/1X0hExK+A9n8hYf0sIu4Fnq0png7Mze/nki7MAaFBfwasiNgQEQ/l99uAlaS/yh/Ix6hRnwakSLbnySH5FXTjGLVi+Nf7FxID9mBXBPA9Scvyv79oFUdExAZIFypweD+3pzd8SNJP8rDQgBkiqZLUBrwJuJ8WOUY1fYIBepwkDZL0MLAJWBQR3TpGrRj+Tf0LiQHolIh4M+k/ol6Shxxs33MjcBxwIrAB+Gy/tqYbJB0M3A5cFhFb+7s9vaFOnwbscYqInRFxIuk/JJwsaVJ36mnF8G/JfyEREevzz03Av5GGt1rBxjwu2z4+u6mf29MjEbExX5yvAF9igB2nPI58O/C1iLgjFw/oY1SvTwP9OAFExPPAEmAa3ThGrRj+LfcvJCQNy19WIWkY8HvATztea8BYCMzM72cCd/ZjW3qs/QLM/pABdJzyl4k3Aysj4nOVWQP2GDXq00A9TpJGSzosvx8KnA48SjeOUcs97QOQH9u6jl3/QmJO/7aoZyQdS7rbh/QvOb4+EPsk6TZgKunfz24EPgF8E1gAHAWsAc6OiAHxJWqD/kwlDSUEsBr4YPtY7L5O0tuAHwLLgVdy8ZWkMfKBeowa9ekcBuBxkvRG0he6g0g37wsi4pOSXkMXj1FLhr+ZmXWsFYd9zMysEw5/M7MCOfzNzArk8DczK5DD38ysQA5/M7MCOfzNzAr0v71I+3pG9UiwAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.hist(data[data['label']==0]['num_sensors'])\n", + "plt.title(\"Distribution of number of sensors used (Benign apps)\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# whether an app uses any sensors\n", + "data['use_sensors'] = data['num_sensors']>0" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "label\n", + "0 0.137392\n", + "1 0.186721\n", + "Name: use_sensors, dtype: float64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.groupby('label')['use_sensors'].mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let $p$ be the proportion of apps that use at least one sensor.\n", + "\n", + "$$H_0: p_1 = p_2 $$\n", + "\n", + "$$H_a: p_1 > p_2 $$\n", + "\n", + "$p_1$: malicious apps\n", + "\n", + "$p_2$: benign apps" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "z_stat: 7.405, p_value: 0.000\n", + "Reject the null hypothesis. Proportion of malicious apps using sensors is higher.\n" + ] + } + ], + "source": [ + "significance = 0.01\n", + "\n", + "sample_success_a, sample_size_a = (data[data['label']==1]['use_sensors'].value_counts().values[1], \n", + " data[data['label']==1]['use_sensors'].shape[0])\n", + "sample_success_b, sample_size_b = (data[data['label']==0]['use_sensors'].value_counts().values[1], \n", + " data[data['label']==0]['use_sensors'].shape[0])\n", + "\n", + "successes = np.array([sample_success_a, sample_success_b])\n", + "samples = np.array([sample_size_a, sample_size_b])\n", + "\n", + "stat, p_value = proportions_ztest(count = successes, nobs = samples, \n", + " alternative = 'larger')\n", + "\n", + "print('z_stat: %0.3f, p_value: %0.3f' % (stat, p_value))\n", + "if p_value > significance:\n", + " print(\"Fail to reject the null hypothesis\")\n", + "else:\n", + " print(\"Reject the null hypothesis. Proportion of malicious apps using sensors is higher.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "p1: 0.187\n", + "p2: 0.137\n" + ] + } + ], + "source": [ + "print('p1: %0.3f' % (sample_success_a / sample_size_a))\n", + "print('p2: %0.3f' % (sample_success_b / sample_size_b))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}