From e10541d3163c3c0dbf6d0b511433d80b6436a6f8 Mon Sep 17 00:00:00 2001 From: baniasbaabe Date: Sun, 4 Feb 2024 11:37:36 +0000 Subject: [PATCH] deploy: dccdefcd7ba1a7fa7861588a37cd8260448fdd59 --- _sources/book/cooltools/Chapter.ipynb | 39 ++++++++++++ .../machinelearning/outlierdetection.ipynb | 59 +++++++++++++++++++ _sources/book/pandas/Chapter.ipynb | 48 +++++++++++++++ _sources/book/pythontricks/Chapter.ipynb | 10 +++- book/cooltools/Chapter.html | 27 +++++++++ book/machinelearning/outlierdetection.html | 44 ++++++++++++++ book/pandas/Chapter.html | 30 ++++++++++ book/pythontricks/Chapter.html | 4 ++ searchindex.js | 2 +- 9 files changed, 261 insertions(+), 2 deletions(-) diff --git a/_sources/book/cooltools/Chapter.ipynb b/_sources/book/cooltools/Chapter.ipynb index feec4ec..ffea5a4 100644 --- a/_sources/book/cooltools/Chapter.ipynb +++ b/_sources/book/cooltools/Chapter.ipynb @@ -1665,6 +1665,45 @@ " }\n", " )" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## SQL Query Builder in Python" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can build SQL queries in Python with pypika.\n", + "\n", + "pypika provides a simple interface to build SQL queries with an easy syntax.\n", + "\n", + "It supports nearly every SQL command." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pypika import Tables, Query\n", + "\n", + "history, customers = Tables('history', 'customers')\n", + "q = Query \\\n", + " .from_(history) \\\n", + " .join(customers) \\\n", + " .on(history.customer_id == customers.id) \\\n", + " .select(history.star) \\\n", + " .where(customers.id == 5)\n", + " \n", + "q.get_sql()\n", + "# SELECT \"history\".* FROM \"history\" JOIN \"customers\" \n", + "# ON \"history\".\"customer_id\"=\"customers\".\"id\" WHERE \"customers\".\"id\"=5" + ] } ], "metadata": { diff --git a/_sources/book/machinelearning/outlierdetection.ipynb b/_sources/book/machinelearning/outlierdetection.ipynb index 1022f7e..3092056 100644 --- a/_sources/book/machinelearning/outlierdetection.ipynb +++ b/_sources/book/machinelearning/outlierdetection.ipynb @@ -83,6 +83,65 @@ " \n", "majority_vote(labels)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Robust Outlier Detection with `puncc`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Outlier Detection is notoriously hard.\n", + "\n", + "But it doesn't have to.\n", + "\n", + "`puncc` offers outlier detection, powered by Conformal Prediction, where the detection threshold will be calibrated.\n", + "\n", + "So, false alarms are reduced." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install puncc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.ensemble import IsolationForest\n", + "from deel.puncc.anomaly_detection import SplitCAD\n", + "from deel.puncc.api.prediction import BasePredictor\n", + "\n", + "# We need to redefine the predict to output the nonconformity scores.\n", + "class ADPredictor(BasePredictor):\n", + " def predict(self, X):\n", + " return -self.model.score_samples(X)\n", + "\n", + "# Wrap Isolation Forest in a predictor\n", + "if_predictor = ADPredictor(IsolationForest())\n", + "\n", + "# Instantiate CAD on top of IF predictor\n", + "if_cad = SplitCAD(if_predictor, train=True)\n", + "\n", + "\n", + "if_cad.fit(z=dataset, fit_ratio=0.7)\n", + "\n", + "# Maximum false detection rate\n", + "alpha = 0.01\n", + "\n", + "results = if_cad.predict(new_data, alpha=alpha)" + ] } ], "metadata": { diff --git a/_sources/book/pandas/Chapter.ipynb b/_sources/book/pandas/Chapter.ipynb index 4fead7e..aabb881 100644 --- a/_sources/book/pandas/Chapter.ipynb +++ b/_sources/book/pandas/Chapter.ipynb @@ -189,6 +189,54 @@ "data = {'Value': [1.2343129, 5.8956701, 6.224289]}\n", "df = pd.DataFrame(data)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Faster I/O with Parquet" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Whenever you work with bigger datasets, please avoid using CSV format (or similar).\n", + "\n", + "CSV files are text files, which are human-readable, and therefore a popular option to store data.\n", + "\n", + "For small datasets, this is not a big issue.\n", + "\n", + "But, what if your data has millions of rows?\n", + "\n", + "It can get really slow to do read/write operations on them.\n", + "\n", + "On the other side, binary files exist too.\n", + "\n", + "They consist of 0s and 1s and are not meant to be human-readable but to be used by programs that know how to interpret them.\n", + "\n", + "Because of that, binary files are more compact and consume less space.\n", + "\n", + "Parquet is one popular binary file format, which is more memory-efficient than CSVs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "# Shape: (100000000, 5)\n", + "df = pd.DataFrame(...)\n", + "\n", + "# Time: 1m 58s\n", + "df.to_csv(\"data.csv\")\n", + "\n", + "# Time: 8s\n", + "df.to_parquet(\"data.parquet\")" + ] } ], "metadata": { diff --git a/_sources/book/pythontricks/Chapter.ipynb b/_sources/book/pythontricks/Chapter.ipynb index 0e64bb5..3b1ef79 100644 --- a/_sources/book/pythontricks/Chapter.ipynb +++ b/_sources/book/pythontricks/Chapter.ipynb @@ -961,7 +961,15 @@ { "cell_type": "markdown", "metadata": {}, - "source": [] + "source": [ + "One cool feature in Python 3.12:\n", + "\n", + "The support for Type Variables.\n", + "\n", + "You can use them to parametrize generic classes and functions.\n", + "\n", + "See below for a small example where our generic class is parametrized by T which we indicate with [T]." + ] }, { "cell_type": "code", diff --git a/book/cooltools/Chapter.html b/book/cooltools/Chapter.html index 9eb3d65..e06f1fd 100644 --- a/book/cooltools/Chapter.html +++ b/book/cooltools/Chapter.html @@ -449,6 +449,7 @@

Contents

  • 2.1.32. Better Alternative to requests
  • 2.1.33. Managing Configurations with python-dotenv
  • 2.1.34. Work with Notion via Python with
  • +
  • 2.1.35. SQL Query Builder in Python
  • @@ -1462,6 +1463,31 @@

    2.1.34. Work with Notion via Python with +
    +

    2.1.35. SQL Query Builder in Python#

    +

    You can build SQL queries in Python with pypika.

    +

    pypika provides a simple interface to build SQL queries with an easy syntax.

    +

    It supports nearly every SQL command.

    +
    +
    +
    from pypika import Tables, Query
    +
    +history, customers = Tables('history', 'customers')
    +q = Query \
    +    .from_(history) \
    +    .join(customers) \
    +    .on(history.customer_id == customers.id) \
    +    .select(history.star) \
    +    .where(customers.id == 5)
    +    
    +q.get_sql()
    +# SELECT "history".* FROM "history" JOIN "customers" 
    +# ON "history"."customer_id"="customers"."id" WHERE "customers"."id"=5
    +
    +
    +
    +
    +