diff --git a/book/machinelearning/outlierdetection.ipynb b/book/machinelearning/outlierdetection.ipynb index 1022f7e..3092056 100644 --- a/book/machinelearning/outlierdetection.ipynb +++ b/book/machinelearning/outlierdetection.ipynb @@ -83,6 +83,65 @@ " \n", "majority_vote(labels)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Robust Outlier Detection with `puncc`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Outlier Detection is notoriously hard.\n", + "\n", + "But it doesn't have to.\n", + "\n", + "`puncc` offers outlier detection, powered by Conformal Prediction, where the detection threshold will be calibrated.\n", + "\n", + "So, false alarms are reduced." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install puncc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.ensemble import IsolationForest\n", + "from deel.puncc.anomaly_detection import SplitCAD\n", + "from deel.puncc.api.prediction import BasePredictor\n", + "\n", + "# We need to redefine the predict to output the nonconformity scores.\n", + "class ADPredictor(BasePredictor):\n", + " def predict(self, X):\n", + " return -self.model.score_samples(X)\n", + "\n", + "# Wrap Isolation Forest in a predictor\n", + "if_predictor = ADPredictor(IsolationForest())\n", + "\n", + "# Instantiate CAD on top of IF predictor\n", + "if_cad = SplitCAD(if_predictor, train=True)\n", + "\n", + "\n", + "if_cad.fit(z=dataset, fit_ratio=0.7)\n", + "\n", + "# Maximum false detection rate\n", + "alpha = 0.01\n", + "\n", + "results = if_cad.predict(new_data, alpha=alpha)" + ] } ], "metadata": {