diff --git a/book/cooltools/Chapter.ipynb b/book/cooltools/Chapter.ipynb index 556656a..6838645 100644 --- a/book/cooltools/Chapter.ipynb +++ b/book/cooltools/Chapter.ipynb @@ -2207,6 +2207,67 @@ "def transform(prompt: str, history: list[mel.ChatMessage]) -> str:\n", " return \"Hello \" + prompt" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Anonymize PII Data with `presidio`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Working with PII data can be a neckbreaker in some cases.\n", + "\n", + "Luckily, for fast anonymization, you can use presidio.\n", + "\n", + "presidio handles anonymization of popular entities like names, phone numbers, credit card numbers or Bitcoin wallets.\n", + "\n", + "It can even handle text in images!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install presidio_analyzer presidio_anonymizer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!python -m spacy download en_core_web_lg" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from presidio_analyzer import AnalyzerEngine\n", + "from presidio_anonymizer import AnonymizerEngine\n", + "\n", + "text_to_anonymize = \"His name is Mr. Jones. His phone number is 212-555-5555.\"\n", + "\n", + "analyzer = AnalyzerEngine()\n", + "results = analyzer.analyze(text=text_to_anonymize, entities=[\"PHONE_NUMBER\", \"PERSON\"], language='en')\n", + "\n", + "anonymizer = AnonymizerEngine()\n", + "\n", + "anonymized_text = anonymizer.anonymize(text=text_to_anonymize, analyzer_results=results)\n", + "\n", + "print(anonymized_text)\n", + "\n", + "# Output: His name is Mr. . His phone number is ." + ] } ], "metadata": {