From ef602d9d00a600fcc9edd5a9b3f100b69ead8a5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jordan=20Fr=C3=A9ry?= Date: Thu, 19 Dec 2024 16:33:50 +0100 Subject: [PATCH] feat: add lora fine tuning for llama 3.2 (#958) --- .github/workflows/ci_timing.yaml | 2 +- .github/workflows/refresh-one-notebook.yaml | 2 + .github/workflows/release.yaml | 4 +- .../DecisionTreeRegressor.ipynb | 18 +- docs/advanced_examples/LinearSVR.ipynb | 4 +- docs/advanced_examples/LoraMLP.ipynb | 307 +- .../RegressorComparison.ipynb | 8 +- docs/advanced_examples/SVMClassifier.ipynb | 36 +- docs/advanced_examples/aggregated_code.txt | 5248 +++++++++++++++++ docs/conventions.md | 2 +- docs/deep-learning/lora_training.md | 122 +- ...oncrete.ml.common.serialization.encoder.md | 2 +- script/doc_utils/check_forbidden_words.py | 2 + .../ml/common/serialization/encoder.py | 2 +- src/concrete/ml/sklearn/glm.py | 8 +- src/concrete/ml/sklearn/neighbors.py | 2 +- src/concrete/ml/sklearn/qnn.py | 8 +- src/concrete/ml/sklearn/rf.py | 8 +- src/concrete/ml/sklearn/svm.py | 8 +- src/concrete/ml/sklearn/tree.py | 8 +- src/concrete/ml/sklearn/xgb.py | 8 +- .../ml/torch/hybrid_backprop_linear.py | 116 + src/concrete/ml/torch/hybrid_model.py | 7 +- src/concrete/ml/torch/lora.py | 480 +- tests/torch/test_lora.py | 876 +-- .../lora_finetuning/GPT2FineTuneHybrid.ipynb | 53 +- .../lora_finetuning/LLamaFineTuning.ipynb | 345 ++ use_case_examples/lora_finetuning/Makefile | 3 + use_case_examples/lora_finetuning/README.md | 46 +- .../data_finetune/dataset.jsonl | 46 + .../data_finetune/raw_cml_1.7.0_examples.txt | 458 ++ .../lora_finetuning/requirements.txt | 11 +- .../lora_finetuning/scripts/create_dataset.py | 109 + .../lora_finetuning/utils_lora.py | 34 +- 34 files changed, 7396 insertions(+), 997 deletions(-) create mode 100644 docs/advanced_examples/aggregated_code.txt create mode 100644 src/concrete/ml/torch/hybrid_backprop_linear.py create mode 100644 use_case_examples/lora_finetuning/LLamaFineTuning.ipynb create mode 100644 use_case_examples/lora_finetuning/data_finetune/dataset.jsonl create mode 100644 use_case_examples/lora_finetuning/data_finetune/raw_cml_1.7.0_examples.txt create mode 100644 use_case_examples/lora_finetuning/scripts/create_dataset.py diff --git a/.github/workflows/ci_timing.yaml b/.github/workflows/ci_timing.yaml index 0e20cbf70..a48a6dad7 100644 --- a/.github/workflows/ci_timing.yaml +++ b/.github/workflows/ci_timing.yaml @@ -1,4 +1,4 @@ -# This workflow uses GitHub CLI to get timings of last 50 runs of Concrete-ML main CI +# This workflow uses GitHub CLI to get timings of last 50 runs of Concrete ML main CI # and send it to slack and add it as an artifact on the workflow name: CML build time on: diff --git a/.github/workflows/refresh-one-notebook.yaml b/.github/workflows/refresh-one-notebook.yaml index 3713dadf8..96f4107b9 100644 --- a/.github/workflows/refresh-one-notebook.yaml +++ b/.github/workflows/refresh-one-notebook.yaml @@ -28,6 +28,7 @@ on: - KNearestNeighbors \n - LinearRegression \n - LinearSVR \n + - LLamaFineTuning \n - LogisticRegression \n - LogisticRegressionTraining \n - LoraMLP \n @@ -76,6 +77,7 @@ env: KNearestNeighbors: "docs/advanced_examples/KNearestNeighbors.ipynb" LinearRegression: "docs/advanced_examples/LinearRegression.ipynb" LinearSVR: "docs/advanced_examples/LinearSVR.ipynb" + LLamaFineTuning: "use_case_examples/lora_finetuning/LLamaFineTuning.ipynb" LogisticRegression: "docs/advanced_examples/LogisticRegression.ipynb" LogisticRegressionTraining: "docs/advanced_examples/LogisticRegressionTraining.ipynb" LoraMLP: "docs/advanced_examples/LoraMLP.ipynb" diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 90c3b7c07..c96c543ea 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -288,7 +288,7 @@ jobs: tags: true # This action creates docker and pypi images directly on the AWS EC2 instance - # The 'PRIVATE_RELEASE_IMAGE_BASE' variable is kept here in case Concrete-ML starts to publish + # The 'PRIVATE_RELEASE_IMAGE_BASE' variable is kept here in case Concrete ML starts to publish # private nightly releases one day. Currently, release candidates and actual releases are all # done through the 'PUBLIC_RELEASE_IMAGE_BASE' image. The private image is also used to list all # tags easily @@ -471,7 +471,7 @@ jobs: echo "" >> "${SECRETS_FILE}" echo "SECRETS_FILE=${SECRETS_FILE}" >> "$GITHUB_ENV" - - name: Build Docker Concrete-ML Image + - name: Build Docker Concrete ML Image if: ${{ success() && !cancelled() }} uses: docker/build-push-action@48aba3b46d1b1fec4febb7c5d0c644b249a11355 with: diff --git a/docs/advanced_examples/DecisionTreeRegressor.ipynb b/docs/advanced_examples/DecisionTreeRegressor.ipynb index 82aa6dd3c..29b1371e1 100644 --- a/docs/advanced_examples/DecisionTreeRegressor.ipynb +++ b/docs/advanced_examples/DecisionTreeRegressor.ipynb @@ -6,9 +6,9 @@ "id": "5755bc04", "metadata": {}, "source": [ - "# Decision Tree Regression Using Concrete-ML\n", + "# Decision Tree Regression Using Concrete ML\n", "\n", - "In this tutorial, we show how to create, train and evaluate a decision tree regression model using Concrete-ML library.\n", + "In this tutorial, we show how to create, train and evaluate a decision tree regression model using Concrete ML library.\n", "\n" ] }, @@ -18,16 +18,16 @@ "id": "2c256087-c16a-4249-9c90-3f4863938385", "metadata": {}, "source": [ - "### Introducing Concrete-ML\n", + "### Introducing Concrete ML\n", "\n", - "> Concrete-ML is an open-source, privacy-preserving, machine learning inference framework based on fully homomorphic encryption (FHE).\n", + "> Concrete ML is an open-source, privacy-preserving, machine learning inference framework based on fully homomorphic encryption (FHE).\n", "> It enables data scientists without any prior knowledge of cryptography to automatically turn machine learning models into their FHE equivalent,using familiar APIs from Scikit-learn and PyTorch.\n", "> — [Zama documentation](../README.md)\n", "\n", "This tutorial does not require a deep understanding of the technology behind concrete-ML.\n", "Nonetheless, newcomers might be interested in reading introductory sections of the official documentation such as:\n", "\n", - "- [What is Concrete-ML](../README.md)\n", + "- [What is Concrete ML](../README.md)\n", "- [Key Concepts](../getting-started/concepts.md)\n", "\n", "In the tutorial, we will be using the following terminology:\n", @@ -233,10 +233,10 @@ "source": [ "## Training A Decision Tree\n", "\n", - "ConcreteDecisionTreeRegressor is the Concrete-ML equivalent of scikit-learn's DecisionTreeRegressor.\n", + "ConcreteDecisionTreeRegressor is the Concrete ML equivalent of scikit-learn's DecisionTreeRegressor.\n", "It supports the same parameters and a similar interface, with the extra capability of predicting directly on ciphertext without the need to decipher it, thus preservacy privacy.\n", "\n", - "Currently, Concrete-ML models must be trained on plaintext. To see how it works, we train a DecisionTreeRegressor with default parameters and estimate its accuracy on test data. Note here that predictions are done on plaintext too, but soon, we will predict on ciphertext." + "Currently, Concrete ML models must be trained on plaintext. To see how it works, we train a DecisionTreeRegressor with default parameters and estimate its accuracy on test data. Note here that predictions are done on plaintext too, but soon, we will predict on ciphertext." ] }, { @@ -479,7 +479,7 @@ "source": [ "## Predicting on Ciphertext\n", "If the predictions are similar although slightly less accurate, the real advantage of ConcreteML is privacy.\n", - "We now show how we can perform prediction on ciphertext with Concrete-ML, so that the model does not need to decipher the data at all to compute its estimate." + "We now show how we can perform prediction on ciphertext with Concrete ML, so that the model does not need to decipher the data at all to compute its estimate." ] }, { @@ -798,7 +798,7 @@ "Once the model is carefully trained and quantized, it is ready to be deployed and used in production. Here are some useful links on the subject:\n", " \n", " - [Inference in the Cloud](../getting-started/cloud.md) summarize the steps for cloud deployment\n", - " - [Production Deployment](../guides/client_server.md) offers a high-level view of how to deploy a Concrete-ML model in a client/server setting.\n", + " - [Production Deployment](../guides/client_server.md) offers a high-level view of how to deploy a Concrete ML model in a client/server setting.\n", " - [Client Server in Concrete ML](./ClientServer.ipynb) provides a more hands-on approach as another tutorial." ] } diff --git a/docs/advanced_examples/LinearSVR.ipynb b/docs/advanced_examples/LinearSVR.ipynb index 7be591052..00b91e8f3 100644 --- a/docs/advanced_examples/LinearSVR.ipynb +++ b/docs/advanced_examples/LinearSVR.ipynb @@ -88,7 +88,7 @@ "\n", "\n", "def get_concrete_plot_config(mse_score=None):\n", - " label = \"Concrete-ML\"\n", + " label = \"Concrete ML\"\n", " if mse_score is not None:\n", " label += f\", {'$MSE$'}={mse_score:.4f}\"\n", " return {\"c\": \"orange\", \"linewidth\": 2.5, \"label\": label}" @@ -646,7 +646,7 @@ "y_pred_sklearn = sklearn_rgs.predict(X_test)\n", "print(f\"Execution time: {(time.time() - time_begin) / len(X_test):.4f} seconds per sample\")\n", "\n", - "# Now predict using clear quantized Concrete-ML model on testing set\n", + "# Now predict using clear quantized Concrete ML model on testing set\n", "time_begin = time.time()\n", "y_preds_quantized = concrete_rgs.predict(X_test)\n", "print(f\"Execution time: {(time.time() - time_begin) / len(X_test):.4f} seconds per sample\")" diff --git a/docs/advanced_examples/LoraMLP.ipynb b/docs/advanced_examples/LoraMLP.ipynb index 7b6dc6e7c..8a5dfd169 100644 --- a/docs/advanced_examples/LoraMLP.ipynb +++ b/docs/advanced_examples/LoraMLP.ipynb @@ -8,30 +8,18 @@ "\n", "This notebook demonstrates encrypted fine-tuning of a small MLP model with LoRA. A model trained on an initial dataset is adapted to a second dataset using LoRA fine-tuning.\n", "\n", - "The fine-tuning dataset and the LoRA weights that are trained are protected using encryption. Thus, the training can be outsourced to a remote server without leaking any sensitive data.\n", + "The fine-tuning dataset and the trained LoRA weights are protected using encryption. Thus, training can be securely outsourced to a remote server without compromising any sensitive data.\n", "\n", - "The hybrid model approach is applied to fine-tuning: only the linear layers of the original model are outsourced to the server. The forward and backward passes on these original weights are performed with encrypted activations and gradients. The LoRA weights are kept by the client, and the client performs the forward and backward passes on the LoRA weights." + "The hybrid approach is applied to fine-tuning: only the linear layers of the original model are outsourced to the server. The forward and backward passes on these layers are performed using encrypted activations and gradients. Meanwhile, the LoRA weights are kept by the client, which performs locally the forward and backward passes on the LoRA weights." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import shutil\n", - "import time\n", "from pathlib import Path\n", "\n", "import matplotlib.pyplot as plt\n", @@ -41,15 +29,13 @@ "from sklearn.datasets import make_circles, make_moons\n", "from torch import nn, optim\n", "from torch.utils.data import DataLoader, TensorDataset\n", - "from tqdm import tqdm\n", "\n", - "from concrete.ml.torch.hybrid_model import HybridFHEModel\n", - "from concrete.ml.torch.lora import LoraTraining, get_remote_names\n", + "from concrete.ml.torch.lora import LoraTrainer\n", "\n", "# Set random seed for reproducibility\n", "SEED = 42\n", "np.random.seed(SEED)\n", - "torch.manual_seed(SEED)" + "torch.manual_seed(SEED);" ] }, { @@ -58,7 +44,7 @@ "source": [ "## Data preparation\n", "\n", - "Two datasets are generated: one for the original training, and a second one on which LORA fine-tuning is performed." + "Two datasets are generated: one for the original training, and a second one on which LoRA fine-tuning is performed." ] }, { @@ -78,7 +64,7 @@ } ], "source": [ - "# Task 1: Two interleaving half circles (make_moons)\n", + "# Task 1: Two interleaving half circles\n", "X_task1, y_task1 = make_moons(n_samples=500, noise=0.1)\n", "# Task 2: Two concentric circles\n", "X_task2, y_task2 = make_circles(n_samples=500, noise=0.2, factor=0.5)\n", @@ -132,13 +118,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training on Task 1 without LoRA:\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "Training on Task 1 without LoRA:\n", "Epoch [20/20], Loss: 0.0036\n" ] }, @@ -269,32 +249,33 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Setup FHE fine-tuning with LoraTraining and HybridFHEModel" + "## Setup FHE fine-tuning with LoraTrainer" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LoRA layers detected in the model.\n" + ] + } + ], "source": [ - "# Set up LoRA training\n", - "lora_training = LoraTraining(peft_model)\n", - "\n", - "# Set up optimizer and scheduler\n", + "# Update training parameters, including loss function\n", "optimizer = optim.Adam(filter(lambda p: p.requires_grad, peft_model.parameters()), lr=0.01)\n", + "loss_fn = nn.CrossEntropyLoss()\n", + "training_args = {\"gradient_accumulation_steps\": 1}\n", "\n", - "# Update training parameters, including loss function\n", - "lora_training.update_training_parameters(\n", - " optimizer=optimizer,\n", - " loss_fn=nn.CrossEntropyLoss(),\n", - " training_args={\"gradient_accumulation_steps\": 1},\n", + "# Set up LoRA training\n", + "lora_trainer = LoraTrainer(\n", + " peft_model, optimizer=optimizer, loss_fn=loss_fn, training_args=training_args\n", ")\n", "\n", - "# Create the HybridFHEModel\n", - "remote_names = get_remote_names(lora_training)\n", - "hybrid_model = HybridFHEModel(lora_training, module_names=remote_names)\n", - "\n", "# Prepare input data for calibration\n", "batch_size_per_task = batch_size // 2\n", "inputset = (\n", @@ -302,10 +283,8 @@ " torch.cat([y_task1[:batch_size_per_task], y_task2[:batch_size_per_task]]),\n", ")\n", "\n", - "# Calibrate and compile the model\n", - "lora_training.toggle_calibrate(enable=True)\n", - "hybrid_model.compile_model(inputset, n_bits=8)\n", - "lora_training.toggle_calibrate(enable=False)" + "# Compile the model\n", + "lora_trainer.compile(inputset, n_bits=8)" ] }, { @@ -313,187 +292,11 @@ "execution_count": 6, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fine-tuning on Task 2 with LoRA:\n" - ] - }, { "name": "stderr", "output_type": "stream", "text": [ - "\r", - "Training: 0%| | 0/10 [00:00 Concrete-ML is an open-source, privacy-preserving, machine learning inference framework based on fully homomorphic encryption (FHE). It enables data scientists without any prior knowledge of cryptography to automatically turn machine learning models into their FHE equivalent, using familiar APIs from Scikit-learn and PyTorch.\n", + "> Concrete ML is an open-source, privacy-preserving, machine learning inference framework based on fully homomorphic encryption (FHE). It enables data scientists without any prior knowledge of cryptography to automatically turn machine learning models into their FHE equivalent, using familiar APIs from Scikit-learn and PyTorch.\n", "> \n", "> — [Zama documentation](../README.md)\n", "\n", - "This tutorial does not require any knowledge of Concrete-ML. Newcomers might nonetheless be interested in reading some of the introductory sections of the official documentation, such as:\n", + "This tutorial does not require any knowledge of Concrete ML. Newcomers might nonetheless be interested in reading some of the introductory sections of the official documentation, such as:\n", "\n", - "- [What is Concrete-ML](../README.md)\n", + "- [What is Concrete ML](../README.md)\n", "- [Key Concepts](../getting-started/concepts.md)\n", "\n", "### Support Vector Machine\n", @@ -46,7 +46,7 @@ "SVM is a machine learning algorithm for classification and regression. LinearSVC is an efficient implementation of SVM\n", "that works best when the data is linearly separable. In this tutorial, we use the [pulsar star dataset](https://www.kaggle.com/datasets/colearninglounge/predicting-pulsar-starintermediate) to determine whether a neutron star can be classified as a pulsar star.\n", "\n", - "Concrete-ML exposes a LinearSVC class which implements the\n", + "Concrete ML exposes a LinearSVC class which implements the\n", "[scikit-learn LinearSVC](https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html) interface, so you should feel right at home.\n", "\n", "### Setup code\n", @@ -342,9 +342,9 @@ "id": "12e827d0", "metadata": {}, "source": [ - "## Part 1: Train a simple model with Concrete-ML\n", + "## Part 1: Train a simple model with Concrete ML\n", "\n", - "The following code quickly scaffolds a Concrete-ML LinearSVC code, which should sound familiar.\n" + "The following code quickly scaffolds a Concrete ML LinearSVC code, which should sound familiar.\n" ] }, { @@ -403,7 +403,7 @@ } ], "source": [ - "# Perform the same steps with the Concrete-ML LinearSVC implementation\n", + "# Perform the same steps with the Concrete ML LinearSVC implementation\n", "svm_concrete = ConcreteLinearSVC(max_iter=100, n_bits=8)\n", "svm_concrete.fit(X_train, y_train)\n", "# plot the boundary\n", @@ -468,15 +468,15 @@ "\n", "#### Simplicity of execution\n", "\n", - "For a high-level use-case, Concrete-ML offers a very similar interface to scikit-learn. The main difference is *a model needs to be compiled to allow execution in FHE*.\n", + "For a high-level use-case, Concrete ML offers a very similar interface to scikit-learn. The main difference is *a model needs to be compiled to allow execution in FHE*.\n", "\n", "#### Model Accuracy\n", "\n", - "Concrete-ML prediction accuracy can be slightly worse than a regular scikit-learn implementation. This is because of [quantization](../explanations/quantization.md): number precision needs to be fixed-size for the model to be evaluated in FHE. This can be alleviated down to where the accuracy difference is none or negligible (which is the case here with a 8 bit size).\n", + "Concrete ML prediction accuracy can be slightly worse than a regular scikit-learn implementation. This is because of [quantization](../explanations/quantization.md): number precision needs to be fixed-size for the model to be evaluated in FHE. This can be alleviated down to where the accuracy difference is none or negligible (which is the case here with a 8 bit size).\n", "\n", "#### Execution time\n", "\n", - "The execution speed can be slower in Concrete-ML, especially during compilation and FHE inference phases, because enabling FHE operations uses more resources than regular inference on plain data. However, the speed can be improved by decreasing the precision of the data and model's weights thanks to the n_bits parameter. But, depending on the project, there is a trade-off between a slower but more accurate model and a faster but less accurate model." + "The execution speed can be slower in Concrete ML, especially during compilation and FHE inference phases, because enabling FHE operations uses more resources than regular inference on plain data. However, the speed can be improved by decreasing the precision of the data and model's weights thanks to the n_bits parameter. But, depending on the project, there is a trade-off between a slower but more accurate model and a faster but less accurate model." ] }, { @@ -536,7 +536,7 @@ "\n", "### Step b: quantize the model\n", "\n", - "So far most of Concrete-ML specificities have conveniently been avoided for the sake of simplicity. The first Concrete-ML specific step of developping a model is to quantize it, which soberly means to turn the model into an integer equivalent.\n", + "So far most of Concrete ML specificities have conveniently been avoided for the sake of simplicity. The first Concrete ML specific step of developping a model is to quantize it, which soberly means to turn the model into an integer equivalent.\n", "\n", "Although it is strongly encouraged to read the [Zama introduction to quantization](../explanations/quantization.md), the key takeaway is **a model needs to be reduced to a *discrete*, smaller set in order for the encryption to happen**. Otherwise the data becomes too large to be manipulated in FHE. \n", "\n", @@ -764,7 +764,7 @@ "- the model itself\n", "- the hardware executing the model\n", "\n", - "Setting up a model in Concrete-ML requires some additional work compared to standard models. For instance, users must select the quantization bit-width for both the model's weight and input data, which can be complex and time-consuming while using real FHE inference. However, Concrete-ML provides an FHE simulation mode that allows users to identify optimal hyper-parameters with the best trade-off between latency and performance.\n", + "Setting up a model in Concrete ML requires some additional work compared to standard models. For instance, users must select the quantization bit-width for both the model's weight and input data, which can be complex and time-consuming while using real FHE inference. However, Concrete ML provides an FHE simulation mode that allows users to identify optimal hyper-parameters with the best trade-off between latency and performance.\n", "\n", "> Testing FHE models on very large data-sets can take a long time. Furthermore, not all models are compatible with FHE constraints out-of-the-box. Simulation using the FHE simulation allows you to execute a model that was quantized, to measure the accuracy it would have in FHE, but also to determine the modifications required to make it FHE compatible.\n", ">\n", @@ -849,13 +849,13 @@ "source": [ "## Conclusion\n", "\n", - "Setting up FHE with Concrete-ML on a LinearSVC model is very simple, in the regard that Concrete-ML provides an implementation of the [scikit-learn LinearSVC interface](https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html). As a matter of fact, a working FHE model can be setup with just a few lines of code.\n", + "Setting up FHE with Concrete ML on a LinearSVC model is very simple, in the regard that Concrete ML provides an implementation of the [scikit-learn LinearSVC interface](https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html). As a matter of fact, a working FHE model can be setup with just a few lines of code.\n", "\n", "Setting up a model with FHE benefits nonetheless from some additional work. For LinearSVC models, the main point is to select a relevant bit-size for [quantizing](../explanations/quantization.md) the model. Some additional tools can smooth up the development workflow, such as alleviating the [compilation](../explanations/compilation.md) time by making use of the [FHE simulation](../explanations/compilation.md#fhe-simulation) \n", "\n", "Once the model is carefully trained and quantized, it is ready to be deployed and used in production. Here are some useful links that cover this subject:\n", "- [Inference in the Cloud](../getting-started/cloud.md) summarize the steps for cloud deployment\n", - "- [Production Deployment](../guides/client_server.md) offers a high-level view of how to deploy a Concrete-ML model in a client/server setting.\n", + "- [Production Deployment](../guides/client_server.md) offers a high-level view of how to deploy a Concrete ML model in a client/server setting.\n", "- [Client Server in Concrete ML](ClientServer.ipynb) provides a more hands-on approach as another tutorial." ] } diff --git a/docs/advanced_examples/aggregated_code.txt b/docs/advanced_examples/aggregated_code.txt new file mode 100644 index 000000000..c3501acad --- /dev/null +++ b/docs/advanced_examples/aggregated_code.txt @@ -0,0 +1,5248 @@ + + +# Code from: ./ClientServer.ipynb +-------------------------------------------------------------------------------- + +import platform +import time +from shutil import copyfile +from tempfile import TemporaryDirectory + +import numpy +from sklearn.datasets import load_breast_cancer + +from concrete.ml.deployment import FHEModelClient, FHEModelDev, FHEModelServer +from concrete.ml.sklearn import XGBClassifier + +class OnDiskNetwork: + """Simulate a network on disk.""" + + def __init__(self): + # Create 3 temporary folder for server, client and dev with tempfile + self.server_dir = TemporaryDirectory() # pylint: disable=consider-using-with + self.client_dir = TemporaryDirectory() # pylint: disable=consider-using-with + self.dev_dir = TemporaryDirectory() # pylint: disable=consider-using-with + + def client_send_evaluation_key_to_server(self, serialized_evaluation_keys): + """Send the public key to the server.""" + with open(self.server_dir.name + "/serialized_evaluation_keys.ekl", "wb") as f: + f.write(serialized_evaluation_keys) + + def client_send_input_to_server_for_prediction(self, encrypted_input): + """Send the input to the server and execute on the server in FHE.""" + with open(self.server_dir.name + "/serialized_evaluation_keys.ekl", "rb") as f: + serialized_evaluation_keys = f.read() + time_begin = time.time() + encrypted_prediction = FHEModelServer(self.server_dir.name).run( + encrypted_input, serialized_evaluation_keys + ) + time_end = time.time() + with open(self.server_dir.name + "/encrypted_prediction.enc", "wb") as f: + f.write(encrypted_prediction) + return time_end - time_begin + + def dev_send_model_to_server(self): + """Send the model to the server.""" + copyfile(self.dev_dir.name + "/server.zip", self.server_dir.name + "/server.zip") + + def server_send_encrypted_prediction_to_client(self): + """Send the encrypted prediction to the client.""" + with open(self.server_dir.name + "/encrypted_prediction.enc", "rb") as f: + encrypted_prediction = f.read() + return encrypted_prediction + + def dev_send_clientspecs_and_modelspecs_to_client(self): + """Send the clientspecs and evaluation key to the client.""" + copyfile(self.dev_dir.name + "/client.zip", self.client_dir.name + "/client.zip") + + def cleanup(self): + """Clean up the temporary folders.""" + self.server_dir.cleanup() + self.client_dir.cleanup() + self.dev_dir.cleanup() + +from concrete.compiler import check_gpu_available + +# Let's first get some data and train a model. +X, y = load_breast_cancer(return_X_y=True) + +# Split X into X_model_owner and X_client +X_model_owner, X_client = X[:-10], X[-10:] +y_model_owner, y_client = y[:-10], y[-10:] + +# Some issues on macOS, if too many estimators +n_estimators = 10 +if platform.system() == "Darwin": + n_estimators = 9 + + +use_gpu_if_available = False +device = "cuda" if use_gpu_if_available and check_gpu_available() else "cpu" + +# Train the model and compile it +model_dev = XGBClassifier(n_bits=2, n_estimators=n_estimators, max_depth=3) +model_dev.fit(X_model_owner, y_model_owner) +model_dev.compile(X_model_owner, device=device) + +print("Model trained and compiled.") + +# Let's instantiate the network +network = OnDiskNetwork() + +# Now that the model has been trained we want to save it to send it to a server +fhemodel_dev = FHEModelDev(network.dev_dir.name, model_dev) +fhemodel_dev.save() + +# Print all files in the temporary directory along with their sizes in KB +!ls -lh $network.dev_dir.name + +# Let's send the model to the server +network.dev_send_model_to_server() +!ls -lh $network.server_dir.name + +# Let's send the clientspecs and evaluation key to the client +network.dev_send_clientspecs_and_modelspecs_to_client() +!ls -lh $network.client_dir.name + +# Let's create the client and load the model +fhemodel_client = FHEModelClient(network.client_dir.name, key_dir=network.client_dir.name) + +# The client first need to create the private and evaluation keys. +serialized_evaluation_keys = fhemodel_client.get_serialized_evaluation_keys() + +# Evaluation keys can be quite large files but only have to be shared once with the server. + +# Check the size of the evaluation keys (in MB) +print(f"Evaluation keys size: {len(serialized_evaluation_keys) / (10**6):.2f} MB") + +# Let's send this evaluation key to the server (this has to be done only once) +network.client_send_evaluation_key_to_server(serialized_evaluation_keys) + +# Now we have everything for the client to interact with the server + +# We create a loop to send the input to the server and receive the encrypted prediction +decrypted_predictions = [] +execution_time = [] +for i in range(X_client.shape[0]): + clear_input = X_client[[i], :] + encrypted_input = fhemodel_client.quantize_encrypt_serialize(clear_input) + execution_time += [network.client_send_input_to_server_for_prediction(encrypted_input)] + encrypted_prediction = network.server_send_encrypted_prediction_to_client() + decrypted_prediction = fhemodel_client.deserialize_decrypt_dequantize(encrypted_prediction)[0] + decrypted_predictions.append(decrypted_prediction) + +# Check MB size with sys of the encrypted data vs clear data +print( + f"Encrypted data is " + f"{len(encrypted_input)/clear_input.nbytes:.2f}" + " times larger than the clear data" +) + +# Show execution time +print(f"The average execution time is {numpy.mean(execution_time):.2f} seconds per sample.") + +# Let's check the results and compare them against the clear model +clear_prediction_classes = model_dev.predict_proba(X_client).argmax(axis=1) +decrypted_predictions_classes = numpy.array(decrypted_predictions).argmax(axis=1) +accuracy = (clear_prediction_classes == decrypted_predictions_classes).mean() +print(f"Accuracy between FHE prediction and clear model is: {accuracy*100:.0f}%") + + + +# Code from: ./FullyConnectedNeuralNetworkOnMNIST.ipynb +-------------------------------------------------------------------------------- + +import time + +import matplotlib.pyplot as plt +import numpy as np +from concrete.compiler import check_gpu_available +from joblib import Memory +from sklearn.datasets import fetch_openml +from sklearn.metrics import accuracy_score +from sklearn.model_selection import train_test_split +from torch import nn + +from concrete.ml.sklearn import NeuralNetClassifier + +use_gpu_if_available = False +device = "cuda" if use_gpu_if_available and check_gpu_available() else "cpu" + +# scikit-learn's fetch_openml method doesn't handle local cache: +# https://github.com/scikit-learn/scikit-learn/issues/18783#issuecomment-723471498 +# This is a workaround that prevents downloading the data every time the notebook is ran +memory = Memory("./data/MNIST") +fetch_openml_cached = memory.cache(fetch_openml) + +# Fetch the MNIST data-set, with inputs already flattened +mnist_dataset = fetch_openml_cached("mnist_784") + +# Define max, mean and std values for the MNIST data-set +max_value = 255 +mean = 0.1307 +std = 0.3081 + +# Normalize the training data +data = (mnist_dataset.data) / max_value +data = ((data - mean) / std).round(decimals=4) + +# Concrete ML's NNs do not support: category, str, object types +# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/2990 +target = mnist_dataset.target.astype("int") + +test_size = 10000 +x_train, x_test, y_train, y_test = train_test_split( + data, target, test_size=test_size, random_state=0 +) + +def plot_samples(data, targets, n_samples=5, title="Train target"): + # MNIST images are originally of shape 28x28 with grayscale values + samples_to_plot = np.array(data)[:n_samples].reshape((n_samples, 28, 28)) + + fig = plt.figure(figsize=(30, 30)) + + for i in range(n_samples): + subplot = fig.add_subplot(1, n_samples, i + 1) + subplot.set_title(f"{title}: {np.array(targets)[i]}", fontsize=15) + subplot.imshow(samples_to_plot[i], cmap="gray", interpolation="nearest") + +plot_samples(x_train, y_train) + +params = { + "module__n_layers": 2, + "module__n_w_bits": 4, + "module__n_a_bits": 4, + "module__n_hidden_neurons_multiplier": 0.5, + "module__activation_function": nn.ReLU, + "max_epochs": 7, +} + +model = NeuralNetClassifier(**params) + +model.fit(X=x_train, y=y_train); + +y_preds_clear = model.predict(x_test, fhe="disable") + +print(f"The test accuracy of the clear model is {accuracy_score(y_test, y_preds_clear):.2f}") + +# Reduce the input-set's length to make the compilation time faster +# The input-set should be large enough to be representative of the input data +inputset = x_train.head(1000) +simulated_fhe_circuit = model.compile(inputset, device=device) + +# Print the circuit's maximum bit-width reached during compilation +print(f"Circuit of {simulated_fhe_circuit.graph.maximum_integer_bit_width()}-bits (FHE simulation)") + +# Evaluate the model using simulation +y_preds_simulated = model.predict(x_test, fhe="simulate") + +print( + "The test accuracy (with FHE simulation) of the FHE model is " + f"{accuracy_score(y_test, y_preds_simulated):.2f}" +) + +# Print the circuit's maximum bit-width reached during compilation +print(f"FHE circuit of {model.fhe_circuit.graph.maximum_integer_bit_width()}-bits") + +time_begin = time.time() +model.fhe_circuit.client.keygen(force=True) +print(f"Key generation time: {time.time() - time_begin:.2f} seconds") + +# Reduce the test set +n_samples = 3 +x_test_sample = x_test.head(n_samples) +y_test_sample = y_test.head(n_samples) + +# Execute the predictions using FHE simulation on a few samples +simulated_fhe_predictions = model.predict(x_test_sample, fhe="simulate") + +time_begin = time.time() +fhe_predictions = model.predict(x_test_sample, fhe="execute") +seconds_per_sample = (time.time() - time_begin) / len(x_test_sample) +print(f"Execution time in FHE: {seconds_per_sample:.2f} seconds per sample\n") + +print("Expected values:", y_test_sample.tolist()) +print("Simulated prediction values:", simulated_fhe_predictions) +print("FHE prediction values:", fhe_predictions) + + + +# Code from: ./KNearestNeighbors.ipynb +-------------------------------------------------------------------------------- + +import time + +import pandas as pd +from sklearn.datasets import make_classification +from sklearn.metrics import accuracy_score +from sklearn.model_selection import train_test_split + +from concrete.ml.sklearn import KNeighborsClassifier as ConcreteKNeighborsClassifier + +X, y = make_classification( + n_samples=15, n_features=3, n_informative=3, n_redundant=0, n_classes=2, n_clusters_per_class=1 +) +# Split the data-set into a train and testing sets +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42) + +n_neighbors = 3 + +concrete_knn = ConcreteKNeighborsClassifier(n_bits=3, n_neighbors=n_neighbors) + +# Fit both the Concrete ML and its equivalent float estimator on clear data +concrete_knn, sklearn_model = concrete_knn.fit_benchmark(X_train, y_train) + +time_begin = time.time() +circuit = concrete_knn.compile(X) +print(f"Compilation time: {time.time() - time_begin:.2f} seconds") + +print(f"Maximum bit-width reached in the circuit: {circuit.graph.maximum_integer_bit_width()}") + +# For circuits exceeding 8-bits, the key generation process might take up to an hour +time_begin = time.time() +circuit.client.keygen() +print(f"Key generation time: {time.time() - time_begin:.2f} seconds") + +# a- Clear inference +pred_cml_clear = concrete_knn.predict(X_test, fhe="disable") +score_cml_clear = accuracy_score(y_test, pred_cml_clear) + +# b- FHE simulation inference +pred_cml_simulate = concrete_knn.predict(X_test, fhe="simulate") +score_cml_simulate = accuracy_score(y_test, pred_cml_simulate) + +# c- FHE inference +time_begin = time.time() +pred_cml_fhe = concrete_knn.predict(X_test, fhe="execute") +print(f"FHE inference execution time: {(time.time() - time_begin) / len(X_test):.2f}s per sample") +score_cml_fhe = accuracy_score(y_test, pred_cml_fhe) + +# scikit-learn inference +predict_sklearn = sklearn_model.predict(X_test) +score_sklearn = accuracy_score(y_test, predict_sklearn) + +print(f"Sckit-learn accuracy: {score_sklearn:.2%}") +print(f"Concrete ML (clear) accuracy: {score_cml_clear:.2%}") +print(f"Concrete ML (FHE simulation) accuracy: {score_cml_simulate:.2%}") +print(f"Concrete ML FHE accuracy: {score_cml_fhe:.2%}") + +# Retieve topk labels for the scikit-learn model +distance, topk_args = sklearn_model.kneighbors(X_test) +topk_labels_sk = y_train[topk_args] + +# Retieve topk labels for the concrete model +# `get_topk_labels` method is like `predict` method, but instead of returning the most common labels +# it provides the top K labels. +topk_labels_cml = concrete_knn.get_topk_labels(X_test, fhe="simulate") + +def highlight_diff(row): + """Custom style function to highlight mismatched predictions.""" + return [ + ( + "background-color: yellow" + if row["Majority vote (Concrete ML)"] != row["Majority vote (scikit-learn)"] + else "" + ) + ] * len(row) + + +df = pd.DataFrame( + { + "Distance": distance[:, 0], + f"Top{n_neighbors} (scikit-learn)": [list(row) for row in topk_labels_sk], + "Majority vote (scikit-learn)": predict_sklearn, + f"Top{n_neighbors} (Concrete ML)": [list(row) for row in topk_labels_cml], + "Majority vote (Concrete ML)": pred_cml_simulate, + "Ground truth": y_test, + } +) + +df.style.apply(highlight_diff, axis=1) + + + +# Code from: ./EncryptedPandas.ipynb +-------------------------------------------------------------------------------- + +import shutil +import time +from pathlib import Path +from tempfile import TemporaryDirectory + +import numpy +import pandas + +from concrete.ml.pandas import ClientEngine, load_encrypted_dataframe +from concrete.ml.pytest.utils import pandas_dataframe_are_equal + +numpy.random.seed(0) + +DATA_PATH = Path("data/encrypted_pandas") + +# pylint: disable=pointless-statement, consider-using-with + +CLIENT_1_DIR = DATA_PATH / "client_1" + +df_left = pandas.read_csv(CLIENT_1_DIR / "df_left.csv") + +df_left + +schema = {"index": {index_value: i + 1 for i, index_value in enumerate(df_left["index"].values)}} + +client_1_temp_dir = TemporaryDirectory(dir=str(CLIENT_1_DIR)) +client_1_temp_path = Path(client_1_temp_dir.name) + +# Define the directory where to store the serialized keys +client_1_keys_path = client_1_temp_path / "keys" + +client_1 = ClientEngine(keys_path=client_1_keys_path) + +df_left_enc = client_1.encrypt_from_pandas(df_left, schema=schema) + +df_left_enc.get_schema() + +df_left_enc_path = client_1_temp_path / "df_left_enc" +df_left_enc.save(df_left_enc_path) + +CLIENT_2_DIR = DATA_PATH / "client_2" + +df_right = pandas.read_csv(CLIENT_2_DIR / "df_right.csv") + +df_right + +client_2_temp_dir = TemporaryDirectory(dir=str(CLIENT_2_DIR)) +client_2_temp_path = Path(client_2_temp_dir.name) + +# Define the directory where to store the serialized keys +client_2_keys_path = client_2_temp_path / "keys" + +# Copy the first user's keys +shutil.copy2(client_1_keys_path, client_2_keys_path) + +client_2 = ClientEngine(keys_path=client_2_keys_path) + +df_right_enc = client_2.encrypt_from_pandas(df_right, schema=schema) + +df_right_enc + +df_right_enc_path = client_2_temp_path / "df_right_enc" +df_right_enc.save(df_right_enc_path) + +df_left_enc = load_encrypted_dataframe(df_left_enc_path) +df_right_enc = load_encrypted_dataframe(df_right_enc_path) + +start = time.time() +df_joined_enc_server = df_left_enc.merge(df_right_enc, how="left", on="index") +end = time.time() - start + +print(f"Total execution time: {end:.2f}s") + +df_joined_enc_server_path = client_1_temp_path / "df_joined_enc" + +df_joined_enc_server.save(df_joined_enc_server_path) + +df_joined_enc = load_encrypted_dataframe(df_joined_enc_server_path) + +df_joined_cml = client_1.decrypt_to_pandas(df_joined_enc) + +df_joined_cml + +df_joined_pandas = pandas.merge(df_left, df_right, how="left", on="index") + +df_joined_pandas + +df_are_equal = pandas_dataframe_are_equal( + df_joined_pandas, df_joined_cml, float_rtol=0.1, equal_nan=True +) + +print("Concrete ML data-frame is equal to Pandas data-frame:", df_are_equal, "\n") + +# Clean the temporary directories and their content +client_1_temp_dir.cleanup() +client_2_temp_dir.cleanup() + + + +# Code from: ./XGBRegressor.ipynb +-------------------------------------------------------------------------------- + +# pylint: disable=too-many-lines,invalid-name +import warnings + +# For warnings in xgboost.sklearn +warnings.simplefilter(action="ignore", category=FutureWarning) + +import time + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sns +from sklearn import metrics, preprocessing +from sklearn.datasets import fetch_openml +from sklearn.model_selection import GridSearchCV, train_test_split +from xgboost.sklearn import XGBRegressor as SklearnXGBRegressor + +from concrete.ml.sklearn import XGBRegressor as ConcreteXGBRegressor + +# Fetch the ames_housing data-set from openml using its data_id. +df, y = fetch_openml(data_id=43926, return_X_y=True) + +# Add the target column to the other features. +df.insert(0, y.name, y) +print(f"Shape: {df.shape}") + +df.head(2) + +# Check the target distribution. +sns.set(rc={"figure.figsize": (6, 3)}) +sns.distplot(y); + +# Pick the numerical features of our data-set. +float_columns = df.select_dtypes(include=[np.number]).columns +categorical_columns = df.select_dtypes(include="category").columns + +print( + f"{len(float_columns)} numerical features vs {len(categorical_columns)} categorical features." +) + +df_encoded = df.copy() + +# Encode target labels with value between 0 and n_classes-1. +le = preprocessing.LabelEncoder() +# Convert the above categorical features into numeric type. +df_encoded[categorical_columns] = df[categorical_columns].apply(le.fit_transform) + +# Before data transformation: +df[categorical_columns].head(2) + +# After data transformation: +df_encoded[categorical_columns].head(2) + +# Removing the target column from the dataframe. +X = df_encoded.drop(columns=y.name, axis=1, inplace=False) + +X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.1, shuffle=True, random_state=24 +) + +# Common hyper-parameters for both scikit-learn and Concrete ML. +n_estimators = 50 +max_depth = 4 +n_jobs = 1 + +# 1. Instantiation of the model. +xgboost_reg = SklearnXGBRegressor(n_estimators=n_estimators, max_depth=max_depth, n_jobs=n_jobs) + +# 2. Train the model. +xgboost_reg.fit(X_train, y_train) + +# 3. XGBoost (fp32) predictions: +y_preds_XGBoost = xgboost_reg.predict(X_test) + +# 4. Evaluation of the with r2_score metric. +print(f"R2_score with XGBoost: {metrics.r2_score(y_test, y_preds_XGBoost):.2f}") + +# 1. Instantiation of the model, the only difference is the `n_bits` hyper-parameter. +# We quantize the inputs and the weights using `n_bits` because FHE operates only over integers + +# n_bits is necessary for the XGBoost of Concrete ML. +n_bits = 5 + +concrete_reg = ConcreteXGBRegressor( + n_bits=n_bits, n_estimators=n_estimators, max_depth=max_depth, n_jobs=n_jobs +) + +# 2. We train the concrete XGBoost model on clear data. +concrete_reg.fit(X_train, y_train) + +# 3. Compilation: +# The quantized model is compiled to an FHE equivalent. To do so, the compiler requires +# an exhaustive set of data to mainly evaluate the maximum integer bit-width within the graph, +# needed during the FHE computations. +circuit = concrete_reg.compile(X_train) +print(f"Generating a key for an {circuit.graph.maximum_integer_bit_width()}-bits circuit") + +# Key generation: +# The compiler returns a circuit, which can then be used for the key generation. +time_begin = time.time() +circuit.client.keygen(force=False) +print(f"Key generation time: {(time.time() - time_begin):.3f} sec") + +width = 0.2 +n_features = 5 # or X.shape[1] to get all importance scores of each attributes +feature_names = X.columns + +# Retrieving the feature importances. +concrete_feature_importances = concrete_reg.sklearn_model.feature_importances_ +xgboost_feature_importances = xgboost_reg.feature_importances_ + +# Sorting the feature_importances_ according the XGBoost results. +index_importances = [[i, fi] for i, fi in enumerate(xgboost_feature_importances)] +index_importances_sorted = np.array( + sorted(index_importances, key=lambda key: key[1], reverse=True) +)[:, 0] + +# Displaying the 5 highest feature importances. +index_importances_sorted = list(map(int, index_importances_sorted))[:n_features] + +_, ax = plt.subplots(figsize=(10, 2)) + +ax.barh( + range(n_features), + xgboost_feature_importances[index_importances_sorted], + width, + color="red", + label="XGBoost", +) +ax.barh( + np.arange(n_features) + width, + concrete_feature_importances[index_importances_sorted], + width, + color="green", + label="XGBoost - Concrete ML", +) + +ax.set_title("Feature importances according to Concrete ML model and scikit-learn model", size=12) +ax.set_yticks(range(n_features), feature_names[index_importances_sorted][:n_features]) +ax.set_xlabel("Feature importances") +ax.set_ylabel("Features name") + +ax.legend(loc="best") +plt.show() + +# 4.1 Inference on clear quantized data (fhe="disable"), the execution is very fast. +y_preds_non_fhe = concrete_reg.predict(X_test, fhe="disable") + +def plot_predictions(y_true: np.array, y_preds: dict, colors: list) -> None: + # For a better visualization, we sort the predictions and the ground truth. + y_true = np.array(y_true) + idx = np.argsort(y_true) + y_true_sorted = y_true[idx] + + for title, y_pred in y_preds.items(): + y_preds[title] = y_pred[idx].flatten() + + ncols, nrows = len(y_preds), 1 + + fig, axes = plt.subplots(nrows, ncols, figsize=(15, 5)) + + for i, ((title, y_pred), c) in enumerate(zip(y_preds.items(), colors)): + axes[i].scatter(np.arange(len(y_true_sorted)), y_true_sorted, c="r") + axes[i].scatter(np.arange(len(y_true_sorted)), y_pred, c=c, alpha=0.5) + axes[i].set_xlabel(title, labelpad=5) + axes[i].set_ylabel("Sale_Prices ($)") + # Hide x ticks, because it just refers to indexes. + axes[i].get_xaxis().set_ticks([]) + + # Set the spacing between subplots. + fig.tight_layout() + +plot_predictions( + y_test, + y_preds={"XGBoost": y_preds_XGBoost, "Quant. XGBoost": y_preds_non_fhe}, + colors=["g", "b"], +) + +print(f"R2_score with XGBoost: {metrics.r2_score(y_test, y_preds_XGBoost):.4f}") +print( + f"R2_score in FHE simulation (not encrypted): {metrics.r2_score(y_test, y_preds_non_fhe):.4f}" +) + +n_folds = 5 +param_grid = { + "n_bits": [2, 3, 4, 5, 6, 7], + "max_depth": [4], + "n_estimators": [10, 20, 50, 100], +} + +grid_search_concrete = GridSearchCV(ConcreteXGBRegressor(), param_grid, cv=n_folds, n_jobs=1) +grid_search_concrete.fit(X_train, y_train); + +results = pd.DataFrame(grid_search_concrete.cv_results_) + +print(f"Best score : {grid_search_concrete.best_score_:.3f}") +print(f"Best params: {grid_search_concrete.best_params_}") + +def lineplot(df, yaxis, ylabel, title, group_keys: str = "param_n_estimators"): + params = [ + {"color": "red", "linewidth": 1}, + {"color": "green", "marker": "x", "markersize": 5, "linewidth": 1}, + {"color": "magenta", "marker": "s", "markersize": 5, "dashes": (3, 20)}, + {"color": "blue", "marker": "^", "markersize": 5, "dashes": (3, 10)}, + {"color": "gold", "marker": "*", "markersize": 5, "dashes": (3, 40)}, + {"color": "black", "linestyle": "dashed", "dashes": (3, 10)}, + ] + + plt.figure(figsize=(15, 4)) + + for (key, grp), param in zip(df.groupby([group_keys]), params): + plt.plot(grp["param_n_bits"], grp[yaxis], **param, label=f"estimators_{key}") + + plt.title(title) + plt.ylabel(ylabel) + plt.xlabel("$n_{bits}$") + plt.legend(loc="best") + plt.ylim(0, 1) + plt.minorticks_on() + plt.show() + +lineplot( + df=results, + yaxis="mean_test_score", + ylabel="$r^2_{score}$", + title="$r^2_{score}$ given n_estimators and n_bits", +) + +best_params_xgboost = {"n_estimators": 50, "n_bits": 5} + +# Train the concrete xgboost with the best combination of parameters. +concrete_reg = ConcreteXGBRegressor(**best_params_xgboost, n_jobs=1) + +concrete_reg.fit(X_train, y_train) + +from concrete.compiler import check_gpu_available + +use_gpu_if_available = False +device = "cuda" if use_gpu_if_available and check_gpu_available() else "cpu" + +# Compile the model using the training data. +circuit = concrete_reg.compile(X_train, device=device) + +# Get the equivalent predictions in clear quantized data: +y_preds_clear = concrete_reg.predict(X_test, fhe="disable") + +# Perform the inference in FHE (simulation): +y_preds_simulated = concrete_reg.predict(X_test, fhe="simulate") + +plot_predictions( + y_test, + y_preds={ + "XGBoost": y_preds_XGBoost, + "Concrete ML without FHE": y_preds_clear, + "Concrete ML with FHE (simulation)": y_preds_simulated, + }, + colors=["g", "b", "m"], +) + +# Test in FHE on a smaller test set +FHE_SAMPLE = 20 +X_test_fhe = X_test[:FHE_SAMPLE] +y_test_fhe = y_test[:FHE_SAMPLE] + +# Perform the inference in FHE: +time_begin = time.time() +y_preds_fhe = concrete_reg.predict(X_test_fhe, fhe="execute") +print(f"FHE runtime per sample: {(time.time() - time_begin) / len(X_test_fhe):.2f} sec") + +# Evaluation + +r2_score_sklearn = metrics.r2_score(y_test, y_preds_XGBoost) +r2_score_clear_concrete = metrics.r2_score(y_test, y_preds_clear) +r2_score_simulated_concrete = metrics.r2_score(y_test, y_preds_simulated) +r2_score_fhe_concrete = metrics.r2_score(y_test_fhe, y_preds_fhe) + +print(f"R2_score with XGBoost : {r2_score_sklearn:.4f}") +print(f"R2_score without FHE : {r2_score_clear_concrete:.4f}") +print(f"R2_score with FHE (simulation) : {r2_score_simulated_concrete:.4f}") +print(f"R2_score with FHE : {r2_score_fhe_concrete:.4f}") + + + +# Code from: ./ExperimentPrivacyTreePaper.ipynb +-------------------------------------------------------------------------------- + +# Importing necessary libraries and modules + +import time + +import numpy as np +from IPython.display import display +from onnx import numpy_helper +from sklearn.datasets import fetch_openml +from sklearn.metrics import ( + accuracy_score, + average_precision_score, + f1_score, + precision_score, + recall_score, +) +from sklearn.model_selection import RepeatedKFold +from sklearn.preprocessing import LabelBinarizer, OrdinalEncoder + +from concrete.ml.sklearn import DecisionTreeClassifier, RandomForestClassifier, XGBClassifier + + +def basic_preprocessing(df, target_column): + """ + Convert categorical columns to their corresponding code values + and binarize the target column. + + Parameters: + df (pandas.DataFrame): Input dataframe to preprocess. + target_column (str): Name of the target column to be binarized. + + Returns: + pandas.DataFrame: Preprocessed dataframe. + """ + + for col in df.columns: + if df[col].dtype == "object": + df[col] = df[col].astype("category") + df[col] = df[col].cat.codes + elif df[col].dtype == "category": + df[col] = df[col].cat.codes + df[target_column] = LabelBinarizer().fit_transform(df[target_column]) + + return df + +# Set up dataset names and their respective IDs for fetching from OpenML +dataset_names = { + "spambase": 44, + "wine": None, + "heart-h": 1565, + "wdbc": 1510, + "adult": None, + "steel": 1504, +} + +datasets = {} + + +def load_dataset(name, data_id=None): + """Load dataset from OpenML by name or by ID. + + Args: + name (str): Name of the dataset. + data_id (int, optional): The ID of the dataset on OpenML. + If provided, the dataset is loaded by ID. + + Returns: + X (np.array): Features of the dataset. + y (np.array): Target labels of the dataset. + """ + if data_id is not None: + X, y = fetch_openml(data_id=data_id, as_frame=False, cache=True, return_X_y=True) + else: + X, y = fetch_openml(name=name, as_frame=False, cache=True, return_X_y=True) + return X, y + + +for ds_name, ds_id in dataset_names.items(): + print(f"Loading {ds_name}") + + X, y = load_dataset(ds_name, ds_id) + + # Remove rows with NaN values + not_nan_idx = np.where(~np.isnan(X).any(axis=1)) + X = X[not_nan_idx] + y = y[not_nan_idx] + + # Convert non-integer target labels to integers + if not y.dtype == np.int64: + encoder = OrdinalEncoder() + y = encoder.fit_transform(y.reshape(-1, 1)).astype(np.int32).squeeze() + + datasets[ds_name] = {"X": X, "y": y} + +# Setting a random seed for reproducibility across all models and operations +random_seed = 42 + +# Models with their hyper-parameters +model_hyperparameters = { + DecisionTreeClassifier: {"max_depth": 5, "random_state": random_seed}, + XGBClassifier: {"max_depth": 3, "n_estimators": 50, "random_state": random_seed}, + RandomForestClassifier: {"n_estimators": 50, "random_state": random_seed}, +} + +decision_tree_comparison_params = { + "spam": {"max_leaf_nodes": 58, "max_depth": 17}, + "heart-h": {"max_leaf_nodes": 5, "max_depth": 3}, + "steel": {"max_leaf_nodes": None, "max_depth": 5}, + "wdbc": {"max_leaf_nodes": None, "max_depth": 10}, +} + +# List of bit-width used for quantization +n_bits_list = list(range(1, 10)) + +def analyze_gemm_computation(concrete_classifier): + """Analyze the GEMM (General Matrix Multiply) operations in the given ONNX model. + + Args: + concrete_classifier (object): Classifier that contains an ONNX model representation. + x_train (np.array): Training dataset. + + Returns: + tuple: Shapes of the matrices involved in GEMM operations. + """ + + # Extract weights and biases from the ONNX model graph + quant_params = { + onnx_init.name: numpy_helper.to_array(onnx_init) + for onnx_init in concrete_classifier.onnx_model.graph.initializer + if "weight" in onnx_init.name or "bias" in onnx_init.name + } + + # Extract the shapes of matrices used in GEMM operations + matrix_shapes = [] + for i in range(1, 4): + key = [key for key in quant_params.keys() if f"_{i}" in key and "weight" in key][0] + matrix_shapes.append(quant_params[key].shape) + + return tuple(matrix_shapes) + +def benchmark_model(X, y, model, model_params, n_bits, rkf): + """Benchmark a given model and return its evaluation scores.""" + scores = { + "precision": [], + "recall": [], + "accuracy": [], + "f1": [], + "average_precision": [], + "nodes": None, + } + scores_fp32 = {"precision": [], "recall": [], "accuracy": [], "f1": [], "average_precision": []} + + metric_func_to_key = { + "precision_score": "precision", + "recall_score": "recall", + "f1_score": "f1", + "average_precision_score": "average_precision", + } + + for train_index, test_index in rkf.split(X): + X_train, X_test = X[train_index], X[test_index] + y_train, y_test = y[train_index], y[test_index] + + concrete_model, sklearn_model = model(n_bits=n_bits, **model_params).fit_benchmark( + X_train, y_train + ) + + y_pred = concrete_model.predict(X_test) + if len(set(y_test)) == 2: + for metric_func in [precision_score, recall_score, average_precision_score, f1_score]: + scores_key = metric_func_to_key[metric_func.__name__] + scores[scores_key].append(metric_func(y_test, y_pred)) + scores["accuracy"].append(accuracy_score(y_test, y_pred)) + + y_pred_fp32 = sklearn_model.predict(X_test) + if len(set(y_test)) == 2: + for metric_func in [precision_score, recall_score, average_precision_score, f1_score]: + scores_key = metric_func_to_key[metric_func.__name__] + scores_fp32[scores_key].append(metric_func(y_test, y_pred_fp32)) + scores_fp32["accuracy"].append(accuracy_score(y_test, y_pred_fp32)) + + shapes = analyze_gemm_computation(concrete_model) + scores["nodes"] = shapes[0][0] + + # Calculate inference time + concrete_model.compile(X_train) + concrete_model.fhe_circuit.keygen(force=False) + + start = time.time() + concrete_model.predict(X_test[:1], fhe="execute") + end = time.time() + scores["inference_time"] = end - start + + start = time.time() + concrete_model.predict(X_test[:1]) + end = time.time() + scores_fp32["inference_time"] = end - start + + return scores, scores_fp32 + + +n_bits = 6 +scores_global = {} + +rkf = RepeatedKFold(n_splits=5, n_repeats=3, random_state=0) + +for dataset_name, dataset_data in datasets.items(): + X, y = dataset_data["X"].astype(np.float32), dataset_data["y"] + assert len(set(y)) >= 2 + if y.dtype not in [np.int32, bool]: + print(f"Unexpected datatype for y in dataset {dataset_name}: {y.dtype}") + + key_dataset = f"{dataset_name} (#features: {X.shape[1]})" + scores_global[key_dataset] = {} + + for cls, model_params in model_hyperparameters.items(): + scores, scores_fp32 = benchmark_model(X, y, cls, model_params, n_bits, rkf) + + scores_global[key_dataset][cls.__name__ + "_concrete"] = scores + scores_global[key_dataset][cls.__name__ + "_fp32"] = scores_fp32 + + print( + f"{cls.__name__} on {key_dataset} -> Acc: {np.mean(scores['accuracy']):.4f}, " + f"Acc (fp32): {np.mean(scores_fp32['accuracy']):.4f}, " + f"FHE inference time: {scores['inference_time']:.2f}s" + ) + +import math + +import pandas as pd + +df = pd.DataFrame.from_dict( + {(i, j): value for i, scores in scores_global.items() for j, value in scores.items()}, + orient="index", +) + + +df["FHE/Clear ratio"] = (df["inference_time"] / df["inference_time"].shift(-1)).apply( + lambda x: "" if (x < 1) or (math.isnan(x)) else str(int(round(x, 0))) + "x" +) + + +def format_scores(val): + if isinstance(val, list): + if not val: + return "-" + return f"{np.mean(val) * 100:.1f}\\% ± {np.std(val) * 100:.1f}\\%" + + if pd.isna(val): + return "-" + + if isinstance(val, (float, int)): + # To ensure all floating point values are treated as percentages + return f"{val:.3f}" + + if "x" in str(val): # Ensure that val is treated as a string + return val + + return "-" + + +df = df.applymap(format_scores) + +# Renaming for display +model_names = { + "DecisionTreeClassifier_concrete": "FHE-DT", + "DecisionTreeClassifier_fp32": "FP32-DT", + "XGBClassifier_concrete": "FHE-XGB", + "XGBClassifier_fp32": "FP32-XGB", + "RandomForestClassifier_concrete": "FHE-RF", + "RandomForestClassifier_fp32": "FP32-RF", +} + +for original, renamed in model_names.items(): + df.index = df.index.set_levels(df.index.levels[1].str.replace(original, renamed), level=1) + +df.columns = df.columns.str.replace("average_precision", "AP") + +# Reordering Columns +columns_order = [col for col in df if col not in ["FHE/Clear ratio", "inference_time"]] + [ + "inference_time", + "FHE/Clear ratio", +] +df = df[columns_order] + +# Drop and rename columns +df.columns = df.columns.str.replace("inference_time", "Time (s)") +df.drop(columns=["precision", "recall"], inplace=True) + +# Adjust LaTeX output +latex_code = df.to_latex(multirow=True, escape=False, column_format="l|l|l|l|l|l|l|l") + +latex_code = latex_code.replace("#", "\\#") +display(df) + +def evaluate_model(X, y, model, rkf): + """Evaluate a given model and return its scores.""" + scores = {"precision": [], "recall": [], "accuracy": [], "f1": [], "average_precision": []} + scores_fp32 = {"precision": [], "recall": [], "accuracy": [], "f1": [], "average_precision": []} + + metric_func_to_key = { + "precision_score": "precision", + "recall_score": "recall", + "f1_score": "f1", + "average_precision_score": "average_precision", + } + + for train_index, test_index in rkf.split(X): + X_train, X_test = X[train_index], X[test_index] + y_train, y_test = y[train_index], y[test_index] + + concrete_model, sklearn_model = model.fit_benchmark(X_train, y_train) + + for model_instance, score_dict in [(concrete_model, scores), (sklearn_model, scores_fp32)]: + y_pred = model_instance.predict(X_test) + for metric_func in [precision_score, recall_score, average_precision_score, f1_score]: + score_key = metric_func_to_key[metric_func.__name__] + score_dict[score_key].append(metric_func(y_test, y_pred)) + score_dict["accuracy"].append(accuracy_score(y_test, y_pred)) + + return scores, scores_fp32 + + +rkf = RepeatedKFold(n_splits=5, n_repeats=3, random_state=0) +X, y = datasets["spambase"]["X"].astype(np.float32), datasets["spambase"]["y"] +assert len(set(y)) == 2 +if y.dtype not in [np.int32, bool]: + print(f"Unexpected datatype for y in dataset spambase: {y.dtype}") + +scores_global = {} + +for n_bits in n_bits_list: + scores_global[n_bits] = {} + + for model_cls, params in model_hyperparameters.items(): + model_instance = model_cls(n_bits=n_bits, **params) + scores, scores_fp32 = evaluate_model(X, y, model_instance, rkf) + + model_name = model_cls.__name__ + scores_global[n_bits][model_name + "_concrete"] = scores + scores_global[n_bits][model_name + "_fp32"] = scores_fp32 + + print(f"{model_name} with {n_bits}-bits:") + print("Average precision:", np.mean(scores["average_precision"])) + print("Average precision (fp32):", np.mean(scores_fp32["average_precision"])) + +import matplotlib.pyplot as plt +from tqdm import tqdm + + +def evaluate_model_on_error_rates(X_train, X_test, y_test, concrete_model, p_error_list): + """Evaluate the concrete model on different error rates and return accuracy and time taken.""" + acc_scores = [] + time_scores = [] + real_p_error_list = [] + + for p_error in tqdm(p_error_list): + concrete_model.compile(X_train, p_error=p_error) + real_p_error_list.append(concrete_model.fhe_circuit.p_error) + concrete_model.fhe_circuit.keygen(force=False) + + start_time = time.time() + y_pred = concrete_model.predict(X_test, fhe="execute") + end_time = time.time() + + acc_scores.append(accuracy_score(y_pred, y_test)) + time_scores.append(end_time - start_time) + + return acc_scores, time_scores, real_p_error_list + + +plt.rcParams.update({"font.size": 16}) +n_bits = 6 +p_error_list = [2e-40, 1e-6, 1e-5, 1e-4, 0.001, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95] +X, y = datasets["spambase"]["X"].astype(np.float32), datasets["spambase"]["y"] + +clf = DecisionTreeClassifier(n_bits=n_bits, **model_hyperparameters[DecisionTreeClassifier]) +rkf = RepeatedKFold(n_splits=20, n_repeats=3, random_state=0) + +for train_index, test_index in rkf.split(X): + X_train, X_test = X[train_index], X[test_index] + y_train, y_test = y[train_index], y[test_index] + + concrete_model, _ = clf.fit_benchmark(X_train, y_train) + + # Calculating num_nodes using analyze_gemm_computation function + shapes = analyze_gemm_computation(concrete_model) + num_nodes = shapes[0][0] + + acc_scores, time_p_error, real_p_error_list = evaluate_model_on_error_rates( + X_train, X_test, y_test, concrete_model, p_error_list + ) + break + +def plot_metrics_vs_error_rates( + metric_values, model_name, num_nodes, xlabel, ylabel, filename, red_line_value +): + """Plot the metrics against error rates.""" + plt.figure() + plt.plot( + [real_p_error_list[0], real_p_error_list[-1]], + [red_line_value, red_line_value], + color="red", + linewidth=2, + label="p_error=2E-40", + ) + plt.plot(real_p_error_list, metric_values, color="blue", linewidth=2, marker="x") + plt.grid(True) + plt.legend() + plt.title(f"{model_name} {num_nodes} nodes") + plt.xlabel(xlabel) + plt.ylabel(ylabel) + plt.semilogx() + plt.xticks(10.0 ** np.arange(-6, 1)) + plt.savefig(filename, bbox_inches="tight", dpi=300) + plt.show() + + +# Plotting accuracy vs error rates +plot_metrics_vs_error_rates( + acc_scores, + "DecisionTreeClassifier", + num_nodes, + "$p_{error}$", + "Metric", + "DecisionTreeClassifier" + "acc_p_error.eps", + 0.91, +) + +# Plotting execution time per data point vs error rates +plot_metrics_vs_error_rates( + np.asarray(time_p_error) / X_test.shape[0], + "DecisionTreeClassifier", + num_nodes, + "$p_{error}$", + "Execution time", + "DecisionTreeClassifier" + "speed_p_error.eps", + 1.807, +) + +# Plot the metrics vs n_bits for each model +plt.rcParams.update({"font.size": 16}) +for cls in model_hyperparameters: + plt.figure() + + f1_scores = [] + f1_scores_fp32 = [] + + average_precision_scores = [] + average_precision_scores_fp32 = [] + + for n_bits in n_bits_list: + average_precision_scores.append( + np.mean(scores_global[n_bits][cls.__name__ + "_concrete"]["average_precision"]) + ) + average_precision_scores_fp32.append( + np.mean(scores_global[n_bits][cls.__name__ + "_fp32"]["average_precision"]) + ) + + f1_scores.append(np.mean(scores_global[n_bits][cls.__name__ + "_concrete"]["f1"])) + f1_scores_fp32.append(np.mean(scores_global[n_bits][cls.__name__ + "_fp32"]["f1"])) + + # plt.legend() + ap_relative = np.array(average_precision_scores) / average_precision_scores_fp32 + f1_relative = np.array(f1_scores) / f1_scores_fp32 + print(f"ap relative: {ap_relative}, f1_relative: {f1_relative}") + plt.plot( + n_bits_list, + average_precision_scores, + label="concrete_average_precision", + color="blue", + linewidth=2, + ) + plt.plot( + n_bits_list, + average_precision_scores_fp32, + label="fp32_average_precision", + color="blue", + linewidth=2, + linestyle="dashed", + ) + + plt.plot(n_bits_list, f1_scores, label="concrete_f1", linewidth=2, color="red") + plt.plot( + n_bits_list, f1_scores_fp32, label="fp32_f1", color="red", linewidth=2, linestyle="dashed" + ) + + plt.grid(True) + plt.xlim([1, 9]) + plt.ylim([0, 1]) + plt.xticks(np.arange(1, 10)) + plt.legend() + + plt.title(cls.__name__) + plt.xlabel("Bitwidth") + plt.ylabel("Metric") + # Save the figure + plt.savefig(cls.__name__ + ".eps", bbox_inches="tight", dpi=300) + + plt.show() + +def predict_with_fhe(clf, X_sample): + """Predict using FHE and return elapsed time.""" + print("Compiling and keygen...") + clf.compile(X_sample[:100]) + clf.fhe_circuit.keygen(force=False) + + print("Predict in FHE") + start_time = time.time() + _ = clf.predict(X_sample[:1], fhe="execute") + end_time = time.time() + + return end_time - start_time + + +def analyze_and_store(clf, X_sample, nodes_dict, scores_dict): + """Analyze the model and store results.""" + elapsed_time = predict_with_fhe(clf, X_sample) + + model_name = clf.__class__.__name__ + if model_name not in nodes_dict: + nodes_dict[model_name] = [] + scores_dict[model_name] = [] + + scores_dict[model_name].append(elapsed_time) + + shapes = analyze_gemm_computation(clf) + nodes_dict[model_name].append(shapes[0][0]) + + print(clf.n_bits) + print(scores_dict[model_name][-1]) + print(nodes_dict[model_name][-1]) + + +X, y = datasets["spambase"]["X"], datasets["spambase"]["y"] +nodes_dict = {} +scores_dict = {} + +for model_name, hyperparameters in model_hyperparameters.items(): + for n_bits in n_bits_list: + clf = model_name(n_bits=n_bits, **hyperparameters) + clf.fit(X, y) + + if n_bits < 9: + analyze_and_store(clf, X, nodes_dict, scores_dict) + +def plot_fhe_inference_time(n_bits_list, scores, model_hyperparameters): + """Plot the FHE inference time against bitwidth for each model.""" + + # Calculate average inference time per node for each bitwidth + n_bits_timings = np.zeros((8,)) + for model in model_hyperparameters: + for idx, n_bits in enumerate(n_bits_list): + if n_bits < 9: + n_bits_timings[idx] += ( + scores[model.__name__][idx] / nodes_dict[model.__name__][idx] * 1000 + ) + n_bits_timings /= len(model_hyperparameters) + + # Plot setup + plt.figure(figsize=(10, 6)) + plt.rcParams.update({"font.size": 16}) + + plt.plot( + range(1, 9), + n_bits_timings, + label="FHE Inference Time", + color="blue", + linewidth=2, + marker="o", + ) + + plt.xlabel("Bitwidth") + plt.ylabel("Time (ms)") + plt.grid(True, which="both") + plt.semilogy() + plt.ylim([0, 1000]) + plt.xlim([0.5, 8.5]) + plt.xticks(np.arange(1, 9)) + plt.title("FHE Execution vs Precision", pad=10) + + plt.savefig("fhe_inference_time.eps", bbox_inches="tight", dpi=300) + plt.show() + + +plot_fhe_inference_time(n_bits_list, scores_dict, model_hyperparameters) + + + +# Code from: ./SVMClassifier.ipynb +-------------------------------------------------------------------------------- + +# display visualizations and plots in the notebook itself +%matplotlib inline + +# import numpy and matplotlib +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from sklearn.decomposition import PCA +from sklearn.metrics import accuracy_score, f1_score, make_scorer +from sklearn.model_selection import GridSearchCV, train_test_split +from sklearn.preprocessing import StandardScaler +from sklearn.svm import LinearSVC as SklearnLinearSVC + +# import the concrete-ml LinearSVC implementation +from concrete.ml.sklearn.svm import LinearSVC as ConcreteLinearSVC + +def plot_decision_boundary( + clf, + X, + y, + title="LinearSVC Decision Boundary", + xlabel="First Principal Component", + ylabel="Second Principal Component", +): + # Perform PCA to reduce the dimensionality to 2 + pca = PCA(n_components=2) + X_pca = pca.fit_transform(X) + + # Create the mesh grid + x_min, x_max = X_pca[:, 0].min() - 1, X_pca[:, 0].max() + 1 + y_min, y_max = X_pca[:, 1].min() - 1, X_pca[:, 1].max() + 1 + xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02), np.arange(y_min, y_max, 0.02)) + + # Transform the mesh grid points back to the original feature space + mesh_points = pca.inverse_transform(np.c_[xx.ravel(), yy.ravel()]) + + # Make predictions using the classifier + Z = clf.predict(mesh_points) + Z = Z.reshape(xx.shape) + + # Plot the decision boundary + _, ax = plt.subplots() + ax.contourf(xx, yy, Z, alpha=0.8) + ax.scatter(X_pca[:, 0], X_pca[:, 1], c=y, edgecolors="k", marker="o", s=50) + + # Calculate the accuracy + accuracy = accuracy_score(y, clf.predict(X)) + + plt.xlabel(xlabel) + plt.ylabel(ylabel) + plt.title(f"{title} (Accuracy: {accuracy:.4f})") + plt.show() + +# Get the data +df = pd.read_csv( + "https://gist.githubusercontent.com/robinstraub/72f1cb27829dba85f49f68210979f561/" + "raw/b9982ae654967028f6f4010bd235d850d38fe25b/pulsar-star-dataset.csv" +) +df.head() + +# Extract the features and labels +X = df.drop(columns=["target_class"]) +y = df["target_class"] + +# Replace N/A values with the mean of the respective feature +X.fillna(X.mean(), inplace=True) + +# Split the data into train and test sets +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + +# Scale the data +scaler = StandardScaler() +X_train = scaler.fit_transform(X_train) +X_test = scaler.transform(X_test) + +# Convert the floating labels to integer labels for both train and test sets +y_train = y_train.astype(int) +y_test = y_test.astype(int) + +# Train a model with scikit-learn LinearSVC, perform prediction and compute the accuracy +svm_sklearn = SklearnLinearSVC(max_iter=100) +svm_sklearn.fit(X_train, y_train) +# plot the boundary +plot_decision_boundary(svm_sklearn, X_test, y_test) + +# Perform the same steps with the Concrete ML LinearSVC implementation +svm_concrete = ConcreteLinearSVC(max_iter=100, n_bits=8) +svm_concrete.fit(X_train, y_train) +# plot the boundary +plot_decision_boundary(svm_concrete, X_test, y_test) + +# A circuit needs to be compiled to enable FHE execution +circuit = svm_concrete.compile(X_train) +# Now that a circuit is compiled, the svm_concrete can predict value with FHE +y_pred = svm_concrete.predict(X_test, fhe="execute") +accuracy = accuracy_score(y_test, y_pred) +# print the accuracy +print(f"FHE Accuracy: {accuracy:.4f} (bit-width: {circuit.graph.maximum_integer_bit_width()})") + +# setup and train a scikit-learn LinearSVC model, just as before +svm_sklearn = SklearnLinearSVC() +svm_sklearn.fit(X_train, y_train) +# predict some test data and measure the model accuracy +y_pred_sklearn = svm_sklearn.predict(X_test) +accuracy_sklearn = accuracy_score(y_test, y_pred_sklearn) + +print(f"Scikit-learn Accuracy: {accuracy_sklearn:.4f}") + +svm = ConcreteLinearSVC() + +# Define the parameter grid for the grid search +param_grid = param_grid = [ + { + "C": np.logspace(-3, 3, 7), + "n_bits": range(2, 17), + "penalty": ["l1", "l2"], + "dual": [False, True], + }, +] + +# Use the F1 score as the metric to optimize, as it provides a +# balanced trade-off between precision and recall +scorer = make_scorer(f1_score, average="weighted") + +# Set up the grid search with the custom scoring function +grid_search = GridSearchCV(svm, param_grid, scoring=scorer, cv=5, n_jobs=1) + +# Fit the grid search to the data +grid_search.fit(X_train, y_train) + +# Convert the grid search results into a pandas DataFrame +results_df = pd.DataFrame(grid_search.cv_results_) + +# Define a custom function to highlight a specific row based on n_bits value + + +def highlight_row(row, n_bits_value=3, color="green"): + return [ + f"background-color: {color}" if row["param_n_bits"] == n_bits_value else "" for _ in row + ] + + +# Find the best hyperparameter combination for each n_bits value +best_results = results_df.loc[results_df.groupby("param_n_bits")["mean_test_score"].idxmax()] +best_results = best_results[ + ["param_n_bits", "param_C", "param_penalty", "param_dual", "mean_test_score"] +] +best_results.reset_index(drop=True, inplace=True) + +# Display the best results DataFrame +best_results.style.apply(highlight_row, n_bits_value=3, axis=1).hide() + +svm_concrete = ConcreteLinearSVC(n_bits=3, C=1, dual=False, penalty="l1") +svm_concrete.fit(X_train, y_train) + +# compile the model +circuit = svm_concrete.compile(X_train) + +# the model can now be executed with FHE +y_pred = svm_concrete.predict(X_test, fhe="simulate") +accuracy = accuracy_score(y_test, y_pred) +print(f"Accuracy with FHE simulation: {accuracy:.4f}") + +# predict the test set to verify the compiled model accuracy +y_pred = svm_concrete.predict(X_test, fhe="execute") +accuracy = accuracy_score(y_test, y_pred) +print(f"Accuracy with FHE execution: {accuracy:.4f}") + + + +# Code from: ./LinearSVR.ipynb +-------------------------------------------------------------------------------- + +import time + +import numpy as np +import pandas as pd +from sklearn.datasets import load_diabetes +from sklearn.metrics import make_scorer, mean_squared_error +from sklearn.model_selection import GridSearchCV, KFold, train_test_split +from sklearn.svm import LinearSVR as SklearnLinearSVR + +from concrete.ml.sklearn.svm import LinearSVR as ConcreteLinearSVR + +%matplotlib inline + +import matplotlib.pyplot as plt +from IPython.display import display + +train_plot_config = {"c": "black", "marker": "D", "s": 15, "label": "Train data"} +test_plot_config = {"c": "red", "marker": "x", "s": 15, "label": "Test data"} + + +def get_sklearn_plot_config(mse_score=None): + label = "scikit-learn" + if mse_score is not None: + label += f", {'$MSE$'}={mse_score:.4f}" + return {"c": "blue", "linewidth": 2.5, "label": label} + + +def get_concrete_plot_config(mse_score=None): + label = "Concrete ML" + if mse_score is not None: + label += f", {'$MSE$'}={mse_score:.4f}" + return {"c": "orange", "linewidth": 2.5, "label": label} + +# Load the diabetes data-set +X, y = load_diabetes(return_X_y=True) +# Use only one feature for educational purpose +X = X[:, np.newaxis, 2] + +# We split the data-set into a training and a testing set +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=23) + +# We sort the test set for a better visualization +sorted_indexes = np.argsort(np.squeeze(X_test)) +X_test = X_test[sorted_indexes, :] +y_test = y_test[sorted_indexes] + +plt.ioff() + +plt.clf() +fig, ax = plt.subplots(1, figsize=(10, 5)) +fig.patch.set_facecolor("white") +ax.scatter(X_train, y_train, **train_plot_config) +ax.scatter(X_test, y_test, **test_plot_config) +ax.legend() +display(fig) + +grid_scorer = make_scorer(mean_squared_error, greater_is_better=False) + +param_grid = { + "epsilon": [0.0, 1.0, 10.0, 20.0], + "C": [0.1, 100.0, 10000.0, 100000.0], +} + +sklearn_rgs = SklearnLinearSVR() +kfold_cv = KFold(n_splits=5, shuffle=True, random_state=13) + +gs_sklearn = GridSearchCV( + sklearn_rgs, + param_grid, + cv=kfold_cv, + scoring=grid_scorer, + verbose=1, +).fit(X_train, y_train) + +param_grid = { + "n_bits": [6, 8, 12], + "epsilon": [0.0, 1.0, 10.0, 20.0], + "C": [0.1, 100.0, 10000.0, 100000.0], +} + +concrete_rgs = ConcreteLinearSVR() + +gs_concrete = GridSearchCV( + concrete_rgs, + param_grid, + cv=kfold_cv, + scoring=grid_scorer, + verbose=1, +).fit(X_train, y_train) + +plt.ioff() + +results_df = pd.DataFrame(gs_concrete.cv_results_) + +fig, ax = plt.subplots(1, figsize=(12, 8)) +(l1,) = ax.plot( + np.arange(16), -results_df.loc[results_df["param_n_bits"] == 6, "mean_test_score"], "-o" +) +(l2,) = ax.plot( + np.arange(16), -results_df.loc[results_df["param_n_bits"] == 8, "mean_test_score"], "-o" +) +(l3,) = ax.plot( + np.arange(16), -results_df.loc[results_df["param_n_bits"] == 12, "mean_test_score"], "-o" +) +ax.legend((l1, l2, l3), ("n_bits = 6", "n_bits = 8", "n_bits = 12"), loc="upper right", shadow=True) +ax.set_xlabel("Different models with fixed values of C and epsilon") +ax.set_ylabel("Mean MSE accros CV folds") +ax.set_title("Impact of `n_bits` on Cross Validation performances") +display(fig) + +# Print mean time fit and std time fit for both models +print( + f"Mean time fit sklearn: {np.mean(gs_sklearn.cv_results_['mean_fit_time']):.3f}s," + f" std time fit sklearn: {np.std(gs_sklearn.cv_results_['mean_fit_time']):.3f}s" +) +print( + f"Mean time fit concrete: {np.mean(gs_concrete.cv_results_['mean_fit_time']):.3f}s," + f"std time fit concrete: {np.std(gs_concrete.cv_results_['mean_fit_time']):.3f}s" +) + +# Print best score for both models +print(f"Best MSE score sklearn: {-gs_sklearn.best_score_:.2f}") +print(f"Best MSE score concrete: {-gs_concrete.best_score_:.2f}") + +# Get best hyperparameters out of gs_concrete +best_params_concrete = gs_concrete.best_params_ +print(f"Best parameters for Concrete: {best_params_concrete}") +best_params_sklearn = gs_sklearn.best_params_ +print(f"Best parameters for Sklearn: {best_params_sklearn}") + +# Train concrete and sklearn LinearSVR with best hyper parameters +concrete_rgs = ConcreteLinearSVR(**best_params_concrete) + +concrete_rgs, sklearn_rgs = concrete_rgs.fit_benchmark(X_train, y_train) + +# Compile the model using the training data +circuit = concrete_rgs.compile(X_train) + +# Generate the key +print(f"Generating a key for an {circuit.graph.maximum_integer_bit_width()}-bit circuit") + +time_begin = time.time() +circuit.client.keygen(force=False) +print(f"Key generation time: {time.time() - time_begin:.2f} seconds") + +# Now predict using the FHE-quantized model on the testing set +time_begin = time.time() +y_pred_fhe = concrete_rgs.predict(X_test, fhe="execute") +print(f"Execution time: {(time.time() - time_begin) / len(X_test):.4f} seconds per sample") + +# Now predict using the Sklearn model on the testing set +time_begin = time.time() +y_pred_sklearn = sklearn_rgs.predict(X_test) +print(f"Execution time: {(time.time() - time_begin) / len(X_test):.4f} seconds per sample") + +# Now predict using clear quantized Concrete ML model on testing set +time_begin = time.time() +y_preds_quantized = concrete_rgs.predict(X_test) +print(f"Execution time: {(time.time() - time_begin) / len(X_test):.4f} seconds per sample") + +# Print all MSE a string to explain + +mse_sklearn = mean_squared_error(y_test, y_pred_sklearn) +mse_clear = mean_squared_error(y_test, y_preds_quantized) +mse_fhe = mean_squared_error(y_test, y_pred_fhe) + +print( + f"Clear FP32 sklearn model MSE: {mse_sklearn:.3f}\n" + f"Clear quantized model MSE: {mse_clear:.3f}\n" + f"FHE model MSE: {mse_fhe:.3f}" +) + +# Measure the error of the FHE-quantized model with respect to quantized clear Concrete ML model +concrete_score_difference = abs(mse_fhe - mse_clear) * 100 / mse_clear +print( + "\nRelative difference between Concrete-ml (quantized clear) and Concrete-ml (FHE) scores:", + f"{concrete_score_difference:.2f}%", +) + + +# Measure the error of the FHE quantized model with respect to the sklearn float model +score_difference = abs(mse_fhe - mse_sklearn) * 100 / mse_sklearn +print( + "Relative difference between scikit-learn (clear) and Concrete-ml (FHE) scores:", + f"{score_difference:.2f}%", +) + +# We densify the space representation of the original X, +# to better visualize the resulting step function in the following figure +x_space = np.linspace(X_test.min(), X_test.max(), num=300) +x_space = x_space[:, np.newaxis] +y_pred_q_space = concrete_rgs.predict(x_space) + +plt.ioff() + +plt.clf() +fig, ax = plt.subplots(1, figsize=(12, 8)) +fig.patch.set_facecolor("white") +ax.scatter(X_train, y_train, **train_plot_config) +ax.scatter(X_test, y_test, **test_plot_config) +ax.plot(X_test, y_pred_sklearn, **get_sklearn_plot_config(mse_sklearn)) +ax.plot(x_space, y_pred_q_space, **get_concrete_plot_config(mse_clear)) +ax.legend() +display(fig) + + + +# Code from: ./LogisticRegressionTraining.ipynb +-------------------------------------------------------------------------------- + +%matplotlib inline +# Import dataset libraries and util functions +from pathlib import Path +from tempfile import TemporaryDirectory + +import matplotlib.pyplot as plt +import numpy as np +from concrete.compiler import check_gpu_available +from matplotlib.colors import ListedColormap +from matplotlib.lines import Line2D +from sklearn import datasets +from sklearn.linear_model import SGDClassifier as SklearnSGDClassifier +from sklearn.metrics import accuracy_score +from sklearn.preprocessing import MinMaxScaler + +from concrete import fhe +from concrete.ml.deployment import FHEModelClient, FHEModelDev, FHEModelServer +from concrete.ml.sklearn import SGDClassifier + +use_gpu_if_available = False +device = "cuda" if use_gpu_if_available and check_gpu_available() else "cpu" + + +def plot_decision_boundary( + X, y, clf=None, weights=None, bias=None, title="Decision Boundary", accuracy=None +): + # Create a mesh to plot the decision boundaries + x_min, x_max = X[:, 0].min() - 0.1, X[:, 0].max() + 0.1 + y_min, y_max = X[:, 1].min() - 0.1, X[:, 1].max() + 0.1 + xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01)) + + if clf is not None: + # Predictions to get the decision boundary + Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) + Z = Z.reshape(xx.shape) + learned_weights = ( + f"Learned weights: " + f"{clf.coef_[0][0]:.3f}, " + f"{clf.coef_[0][1]:.3f}, " + f"{clf.intercept_.reshape((-1,))[0]:.3f}" + ) + elif weights is not None and bias is not None: + # Compute the linear model for the mesh grid + linear_model = np.dot(np.c_[xx.ravel(), yy.ravel()], weights[0]) + bias[0] + Z = np.round(1 / (1 + np.exp(-linear_model))) + Z = Z.reshape(xx.shape) + learned_weights = "" + else: + raise ValueError("Either 'clf' or both 'weights' and 'bias' must be provided.") + + # Define red and blue color map + cm_bright = ListedColormap(["#FF0000", "#0000FF"]) + + # Plotting the results + plt.figure(figsize=(10, 6)) + plt.contourf(xx, yy, Z, alpha=0.3, cmap=cm_bright) + plt.scatter(X[:, 0], X[:, 1], c=y, edgecolor="k", cmap=cm_bright) + plt.title(f"{title} (Accuracy: {accuracy})\n {learned_weights}") + plt.xlabel("Feature 1") + plt.ylabel("Feature 2") + + # Create a custom legend + legend_elements = [ + Line2D( + [0], + [0], + marker="o", + color="w", + label="Class 0", + markerfacecolor="#FF0000", + markersize=10, + ), + Line2D( + [0], + [0], + marker="o", + color="w", + label="Class 1", + markerfacecolor="#0000FF", + markersize=10, + ), + ] + plt.legend(handles=legend_elements, loc="upper right") + + plt.show() + + +# Load the Iris dataset +X_full, y_full = datasets.load_iris(return_X_y=True) +X_full = MinMaxScaler(feature_range=[-1, 1]).fit_transform(X_full) + +# Select petal length and petal width for visualization +X = X_full[:, 2:4] # Petal length and petal width + +# Filter the dataset for binary classification (Versicolor and Virginica) +# These correspond to target labels 1 and 2 in the Iris dataset +binary_filter = (y_full == 1) | (y_full == 2) +X_binary = X[binary_filter] +X_full_binary = X_full[binary_filter] +y_binary = y_full[binary_filter] - 1 + +# Train an SGDClassifier on the binary dataset +N_ITERATIONS = 15 +RANDOM_STATE = 42 + +np.random.seed(RANDOM_STATE) + +model_binary_sklearn = SklearnSGDClassifier(random_state=RANDOM_STATE, max_iter=N_ITERATIONS) + +model_binary_sklearn.fit(X_binary, y_binary) + +y_pred_binary_sklearn = model_binary_sklearn.predict(X_binary) + +accuracy_binary_sklearn = accuracy_score(y_binary, y_pred_binary_sklearn) + +plot_decision_boundary( + X_binary, + y_binary, + clf=model_binary_sklearn, + accuracy=accuracy_binary_sklearn, + title="Scikit-Learn decision boundary", +) + +parameters_range = (-1.0, 1.0) + +model_binary_fhe = SGDClassifier( + random_state=RANDOM_STATE, + max_iter=N_ITERATIONS, + fit_encrypted=True, + parameters_range=parameters_range, + verbose=True, +) + +# Fit on encrypted data +model_binary_fhe.fit(X_binary, y_binary, fhe="execute", device=device) + +# The weights are decrypted at the end of the `fit` call. Use the clear weights here +# to evaluate accuracy on clear data +y_pred_binary = model_binary_fhe.predict(X_binary) + +model_binary_fhe.compile(X_binary) + +# Evaluate the decrypted weights on encrypted data +y_pred_binary_fhe = model_binary_fhe.predict(X_binary, fhe="execute") + +# Check that the same result is obtained when applying +# the decrypted model on clear data and on encrypted data +# Linear classifiers are 100% correct on encrypted data compared to execution on clear data +assert np.all(y_pred_binary == y_pred_binary_fhe) + +accuracy_binary_fhe = accuracy_score(y_binary, y_pred_binary_fhe) + +plot_decision_boundary( + X_binary, + y_binary, + clf=model_binary_fhe, + accuracy=accuracy_binary_fhe, + title="Concrete ML (training on encrypted data with FHE) decision boundary", +) + +from sklearn.model_selection import train_test_split + +X, y = datasets.load_breast_cancer(return_X_y=True) +x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y) + +scaler = MinMaxScaler(feature_range=[-1, 1]) +x_train = scaler.fit_transform(x_train) +x_test = scaler.transform(x_test) + +rng = np.random.default_rng(RANDOM_STATE) +perm = rng.permutation(x_train.shape[0]) + +x_train = x_train[perm, ::] +y_train = y_train[perm] + +parameters_range = (-1.0, 1.0) + +model_sklearn = SklearnSGDClassifier( + random_state=RANDOM_STATE, + max_iter=N_ITERATIONS, +) + +model_sklearn.fit(x_train, y_train) + +y_pred_sklearn = model_sklearn.predict(x_test) + +accuracy_sklearn = accuracy_score(y_test, y_pred_sklearn) + +print(f"Sklearn clear accuracy: {accuracy_sklearn*100:.2f}%") + +model_concrete = SGDClassifier( + random_state=RANDOM_STATE, + max_iter=N_ITERATIONS, + fit_encrypted=True, + parameters_range=parameters_range, +) + +# Train with simulation on the full dataset +model_concrete.fit(x_train, y_train, fhe="simulate") + +model_concrete.compile(x_train) + +# Measure accuracy on the test set using simulation +y_pred_fhe = model_concrete.predict(x_test, fhe="simulate") + +accuracy_fhe = accuracy_score(y_test, y_pred_fhe) +print(f"Full encrypted fit (simulated) accuracy: {accuracy_fhe*100:.2f}%") + +# To measure accuracy after every batch initialize the SGDClassifier with warm_start=True +# which keeps the weights obtained with previous batches + +model_concrete_partial = SGDClassifier( + random_state=RANDOM_STATE, + max_iter=N_ITERATIONS, + fit_encrypted=True, + parameters_range=parameters_range, + warm_start=True, +) + +batch_size = model_concrete_partial.batch_size + +classes = np.unique(y_train) + +# Go through the training batches +accuracy_scores = [] +for idx in range(x_train.shape[0] // batch_size): + batch_range = range(idx * batch_size, (idx + 1) * batch_size) + x_batch = x_train[batch_range, ::] + y_batch = y_train[batch_range] + + # Fit on a single batch with partial_fit + # Provide the list of all expected classes for the first iteration, as done in scikit-learn + if idx == 0: + model_concrete_partial.partial_fit(x_batch, y_batch, classes=classes, fhe="simulate") + else: + model_concrete_partial.partial_fit(x_batch, y_batch, fhe="simulate") + + model_concrete_partial.compile(x_train) + + # Measure accuracy of the model with FHE simulation + y_pred_partial_fhe = model_concrete_partial.predict(x_test, fhe="simulate") + + accuracy_partial = accuracy_score(y_test, y_pred_partial_fhe) + accuracy_scores.append(accuracy_partial) + +# Plot the evolution of accuracy throughout the training process +fig = plt.figure() +plt.plot(accuracy_scores) +plt.title(f"Accuracy evolution on breast-cancer. Final accuracy {accuracy_scores[-1]*100:.2f}%") +plt.xlabel("Batch number") +plt.ylabel("Accuracy") +plt.grid(True) +plt.show() + +# Initialize the model with parameters +parameters_range = (-1.0, 1.0) +batch_size = 8 + +sgd_clf_binary_fhe = SGDClassifier( + random_state=RANDOM_STATE, + max_iter=N_ITERATIONS, + fit_encrypted=True, + parameters_range=parameters_range, +) + +# Generate the min and max values for X_binary and y_binary +x_min, x_max = X_binary.min(axis=0), X_binary.max(axis=0) +y_min, y_max = y_binary.min(), y_binary.max() + +# Create a dataset with the min and max values for each feature, repeated to fill the batch size +x_compile_set = np.vstack([x_min, x_max] * (batch_size // 2)) + +# Create a dataset with the min and max values for y, repeated to fill the batch size +y_compile_set = np.array([y_min, y_max] * (batch_size // 2)) + +# Fit the model with the created dataset to compile it for production +# This step ensures the model knows the number of features, targets and features distribution + + +device = "cuda" if check_gpu_available() else "cpu" +sgd_clf_binary_fhe.fit(x_compile_set, y_compile_set, fhe="disable", device=device) + +# Define the directory where to save the deployment files +DEPLOYMENT_PATH = Path("fhe_training") +DEPLOYMENT_PATH.mkdir(exist_ok=True) + +deployment_dir = TemporaryDirectory(dir=str(DEPLOYMENT_PATH)) # pylint: disable=consider-using-with +deployment_path = Path(deployment_dir.name) + +# Save the training FHE circuit for production +fhe_dev = FHEModelDev(deployment_path, sgd_clf_binary_fhe) +fhe_dev.save(mode="training") + +# On the client side, load the circuit.zip with the information to create +# - the key +# - the pre and post processing functions + +fhe_client = FHEModelClient(deployment_path) +fhe_client.load() +serialized_evaluation_keys = fhe_client.get_serialized_evaluation_keys() + +# On the server side, we load the server.zip which contain the training model +fhe_server = FHEModelServer(deployment_path) +fhe_server.load() + +# Define utils function to evaluate the model + + +def model_inference(weights, bias, X): + # Compute the linear model + linear_model = np.dot(X, weights[0]) + bias[0] + + # Apply the sigmoid function + sigmoid = 1 / (1 + np.exp(-linear_model)) + + # Compute the prediction + prediction = np.round(sigmoid) + + return prediction + + +def compute_model_accuracy(weights, bias, X, y): + # Compute the prediction + prediction = model_inference(weights, bias, X).squeeze() + + # Compute the accuracy + return np.mean(prediction == y) + +batch_size = sgd_clf_binary_fhe.batch_size + +# Shuffle X_binary and y_binary +perm = np.random.permutation(X_binary.shape[0]) +X_binary = X_binary[perm, ::] +y_binary = y_binary[perm] + +# Initialize the weight and bias randomly +# They are going to be updated using FHE training. +weights = np.random.rand(1, X_binary.shape[1], 1) +bias = np.random.rand(1, 1, 1) + +# Plot the decision boundaries before starting +plot_decision_boundary( + X_binary, + y_binary, + weights=weights, + bias=bias, + title="Decision Boundary before training", + accuracy=compute_model_accuracy(weights, bias, X_binary, y_binary), +) + + +def quantize_encrypt_serialize_batches(fhe_client, x, y, weights, bias, batch_size): + x_batches_enc, y_batches_enc = [], [] + + for i in range(0, x.shape[0], batch_size): + + # Avoid the last batch if it's not a multiple of 'batch_size' + if i + batch_size < x.shape[0]: + batch_range = range(i, i + batch_size) + else: + break + + # Make the data X (1, batch_size, n_features) and y (1, batch_size, n_targets=1) + x_batch = np.expand_dims(x[batch_range, :], 0) + y_batch = np.expand_dims(y[batch_range], (0, 2)) + + # Encrypt the batch + x_batch_enc, y_batch_enc, _, _ = fhe_client.quantize_encrypt_serialize( + x_batch, y_batch, None, None + ) + + x_batches_enc.append(x_batch_enc) + y_batches_enc.append(y_batch_enc) + + _, _, weights_enc, bias_enc = fhe_client.quantize_encrypt_serialize(None, None, weights, bias) + + return x_batches_enc, y_batches_enc, weights_enc, bias_enc + + +def server_run(fhe_server, x_batches_enc, y_batches_enc, weights_enc, bias_enc, evaluation_keys): + + weights_enc = fhe.Value.deserialize(weights_enc) + bias_enc = fhe.Value.deserialize(bias_enc) + + evaluation_keys = fhe.EvaluationKeys.deserialize(evaluation_keys) + + # Run the circuit on the server n times, n being the number of batches sent by the user + for x_batch, y_batch in zip(x_batches_enc, y_batches_enc): + x_batch = fhe.Value.deserialize(x_batch) + y_batch = fhe.Value.deserialize(y_batch) + + weights_enc, bias_enc = fhe_server.run( + (x_batch, y_batch, weights_enc, bias_enc), evaluation_keys + ) + + weights_enc = weights_enc.serialize() + bias_enc = bias_enc.serialize() + + return weights_enc, bias_enc + + +def train_fhe_client_server( + x, + y, + batch_size, + fhe_client, + fhe_server, + serialized_evaluation_keys, + weights, + bias, + n_epochs=1, +): + acc_history = [] + + for epoch in range(n_epochs): + # Shuffle x and y + perm = np.random.permutation(x.shape[0]) + x = x[perm, ::] + y = y[perm] + + # Quantize, encrypt and serialize the batched inputs as well as the weight and bias values + x_batches_enc, y_batches_enc, weights_enc, bias_enc = quantize_encrypt_serialize_batches( + fhe_client, x, y, weights, bias, batch_size + ) + + # Iterate the circuit over the batches on the server + fitted_weights_enc, fitted_bias_enc = server_run( + fhe_server, + x_batches_enc, + y_batches_enc, + weights_enc, + bias_enc, + serialized_evaluation_keys, + ) + + # Back on the client, deserialize, decrypt and de-quantize the fitted weight and bias values + weights, bias = fhe_client.deserialize_decrypt_dequantize( + fitted_weights_enc, fitted_bias_enc + ) + + # Compute, store and print the epoch's accuracy + accuracy_score = compute_model_accuracy(weights, bias, x, y) + acc_history.append(accuracy_score) + + print(f"Epoch {epoch + 1}/{n_epochs} completed. Accuracy: {acc_history[-1]}") + + return weights, bias, acc_history + + +weights, bias, acc_history = train_fhe_client_server( + X_binary, + y_binary, + batch_size, + fhe_client, + fhe_server, + serialized_evaluation_keys, + weights, + bias, +) + +# Plot the decision final model boundary +plot_decision_boundary( + X_binary, + y_binary, + weights=weights, + bias=bias, + title="Decision Boundary after training", + accuracy=acc_history[-1], +) + +# Let's rotate the dataset 90 degrees and see +# if the model can learn the new dataset + +# Define the 90-degree rotation matrix +rotation_matrix = np.array([[0, -1], [1, 0]]) + +# Apply the rotation matrix to X_binary +X_binary_pivoted = X_binary @ rotation_matrix + +# Plot before training +plot_decision_boundary( + X_binary_pivoted, + y_binary, + weights=weights, + bias=bias, + title="Pivoted Dataset", + accuracy=compute_model_accuracy(weights, bias, X_binary_pivoted, y_binary), +) + +# Train the model again with the pivoted dataset +weights_pivoted, bias_pivoted, acc_history_pivoted = train_fhe_client_server( + X_binary_pivoted, + y_binary, + batch_size, + fhe_client, + fhe_server, + serialized_evaluation_keys, + weights, + bias, + n_epochs=2, +) + +# Plot the decision boundary for the pivoted dataset +plot_decision_boundary( + X_binary_pivoted, + y_binary, + weights=weights_pivoted, + bias=bias_pivoted, + title="Decision Boundary after training on pivoted dataset", + accuracy=acc_history_pivoted[-1], +) + +# Clean the temporary directories and their content +deployment_dir.cleanup() + + + +# Code from: ./QuantizationAwareTraining.ipynb +-------------------------------------------------------------------------------- + +import time + +import matplotlib.pyplot as plt +import numpy +import torch +from sklearn.model_selection import train_test_split +from torch import nn +from torch.utils.data import DataLoader, TensorDataset +from tqdm.auto import tqdm + +from concrete.ml.quantization.quantized_module import QuantizedModule +from concrete.ml.torch.compile import compile_brevitas_qat_model + +IN_FEAT = 2 +OUT_FEAT = 2 +N_SIDE = 100 +N_EXAMPLE_TOTAL = N_SIDE * N_SIDE +N_TEST = 500 +CLUSTERS = 3 + +# Generate the grid points and put them in a 2 column list of X,Y coordinates +xx, yy = numpy.meshgrid(numpy.linspace(0, 1, N_SIDE), numpy.linspace(0, 1, N_SIDE)) +X = numpy.c_[numpy.ravel(xx), numpy.ravel(yy)] + +# Generate the labels, using the XOR function to produce the checkerboard +y = (numpy.rint(xx * CLUSTERS).astype(numpy.int64) % 2) ^ ( + (numpy.rint(yy * CLUSTERS).astype(numpy.int64) % 2) +) +y = y.ravel() + +# Add some noise to the data +X += numpy.random.randn(X.shape[0], X.shape[1]) * 0.01 + +# Plot the data +plt.scatter(X[:, 0], X[:, 1], c=y) +plt.title("Original dataset") +plt.show() + +# And, finally, split it into train/test sets +X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=N_TEST / N_EXAMPLE_TOTAL, random_state=42 +) + +# pylint: disable-next=too-many-arguments +def train( + torch_model, + X_train, + X_test, + y_train, + y_test, + criterion, + optimizer, + epochs=10, + batch_size=1, + shuffle=True, + device="cpu", +): + X_train = torch.tensor(X_train).float() + X_test = torch.tensor(X_test).float() + y_train = torch.tensor(y_train) + + train_loader = DataLoader( + TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=shuffle + ) + torch_model.train() + for epoch in range(epochs): + total_loss = [] + y_pred_all = [] + y_true_all = [] + + for batch_index, (X_batch, y_batch) in enumerate(train_loader): + # Forward pass + X_batch = X_batch.to(device) + y_batch = y_batch.to(device) + y_pred = torch_model(X_batch) + y_pred_all.append(y_pred.argmax(1).detach().cpu().numpy()) + y_true_all.append(y_batch.detach().cpu().numpy()) + + # Compute loss + loss = criterion(y_pred, y_batch) + if torch.isnan(loss): + print("y_pred", y_pred) + print("y_batch", y_batch) + raise ValueError(f"Loss diverged at step: {batch_index}") + + # Backward pass + optimizer.zero_grad() + loss.backward() + + # Update weights + optimizer.step() + + total_loss.append(loss.cpu().item()) + + # Print epoch number, loss and accuracy + y_pred_all = numpy.concatenate(y_pred_all) + y_true_all = numpy.concatenate(y_true_all) + accuracy = numpy.mean(y_pred_all == y_true_all) + print( + f"Epoch: {epoch:02} | Loss: {numpy.mean(total_loss):.4f} |" + f" Train Accuracy: {100*accuracy:.2f}%" + ) + + # Compute test accuracy once training is done + torch_model.eval() + fp32_pred = torch_model(X_test.to(device)).cpu().argmax(1).float().detach().numpy() + accuracy = numpy.mean(fp32_pred == y_test) + print(f"\nTest Accuracy Fp32: {accuracy*100:.2f}%") + + return accuracy + +def test_in_fhe(quantized_numpy_module, X_test, y_test, simulate=True): + if not simulate: + print("Generating key") + start_key = time.time() + quantized_numpy_module.fhe_circuit.keygen() + end_key = time.time() + print(f"Key generation finished in {end_key - start_key:.2f} seconds") + + fhe_mode = "simulate" if simulate else "execute" + + start_infer = time.time() + predictions = quantized_numpy_module.forward(X_test, fhe=fhe_mode).argmax(1) + end_infer = time.time() + + if not simulate: + print( + f"Inferences finished in {end_infer - start_infer:.2f} seconds " + f"({(end_infer - start_infer)/len(X_test):.2f} seconds/sample)" + ) + + # Compute accuracy + accuracy = numpy.mean(predictions == y_test) * 100 + print( + "FHE " + ("(simulation) " * simulate) + f"accuracy: {accuracy:.2f}% on " + f"{len(X_test)} examples." + ) + return predictions + +import brevitas.nn as qnn +from brevitas.core.bit_width import BitWidthImplType +from brevitas.core.quant import QuantType +from brevitas.core.restrict_val import FloatToIntImplType, RestrictValueType +from brevitas.core.scaling import ScalingImplType +from brevitas.core.zero_point import ZeroZeroPoint +from brevitas.inject import ExtendedInjector +from brevitas.quant.solver import ActQuantSolver, WeightQuantSolver +from dependencies import value +from torch.nn.utils import prune + + +# More details on injectors at +# https://github.com/Xilinx/brevitas/blob/master/ARCHITECTURE.md#injectors-and-quantizers +class CommonQuant(ExtendedInjector): + bit_width_impl_type = BitWidthImplType.CONST + scaling_impl_type = ScalingImplType.CONST + restrict_scaling_type = RestrictValueType.FP + zero_point_impl = ZeroZeroPoint + float_to_int_impl_type = FloatToIntImplType.ROUND + scaling_per_output_channel = False + narrow_range = True + signed = True + + @value + def quant_type(bit_width): # pylint: disable=no-self-argument + if bit_width is None: + return QuantType.FP + if bit_width == 1: + return QuantType.BINARY + return QuantType.INT + + +class CommonWeightQuant(CommonQuant, WeightQuantSolver): # pylint: disable=too-many-ancestors + scaling_const = 1.0 + signed = True + + +class CommonActQuant(CommonQuant, ActQuantSolver): # pylint: disable=too-many-ancestors + min_val = -1.0 + max_val = 1.0 + +class QATPrunedSimpleNet(nn.Module): + def __init__(self, n_hidden, qlinear_args, qidentity_args): + super().__init__() + + self.pruned_layers = set() + + self.quant_inp = qnn.QuantIdentity(**qidentity_args) + + self.fc1 = qnn.QuantLinear(IN_FEAT, n_hidden, **qlinear_args) + + self.relu1 = qnn.QuantReLU(bit_width=qidentity_args["bit_width"]) + + self.fc2 = qnn.QuantLinear(n_hidden, n_hidden, **qlinear_args) + + self.relu2 = qnn.QuantReLU(bit_width=qidentity_args["bit_width"]) + + self.fc3 = qnn.QuantLinear(n_hidden, OUT_FEAT, **qlinear_args) + + for m in self.modules(): + if isinstance(m, qnn.QuantLinear): + torch.nn.init.uniform_(m.weight.data, -1, 1) + + def forward(self, x): + x = self.quant_inp(x) + x = self.relu1(self.fc1(x)) + x = self.relu2(self.fc2(x)) + x = self.fc3(x) + return x + + def prune(self, max_non_zero): + # Linear layer weight has dimensions NumOutputs x NumInputs + for name, layer in self.named_modules(): + if isinstance(layer, qnn.QuantLinear): + num_zero_weights = (layer.weight.shape[1] - max_non_zero) * layer.weight.shape[0] + if num_zero_weights <= 0: + continue + print(f"Pruning layer {name} factor {num_zero_weights}") + prune.l1_unstructured(layer, "weight", amount=num_zero_weights) + self.pruned_layers.add(name) + + def unprune(self): + for name, layer in self.named_modules(): + if name in self.pruned_layers: + prune.remove(layer, "weight") + self.pruned_layers.remove(name) + +# Add MPS (for macOS with Apple Silicon or AMD GPUs) support when error is fixed. For now, we +# observe a decrease in torch's top1 accuracy when using MPS devices +# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3953 +device = "cuda" if torch.cuda.is_available() else "cpu" + +# Define our loss function +criterion = nn.CrossEntropyLoss() + +# Define the batch size +batch_size = 1 +n_epochs = 7 +n_hidden = 100 + +# We use 100 neurons with only 20 that will be active. Having many neurons +# out of which we chose the best ones increases the robustness of training +# while keeping the accumulator size low +torch_model = QATPrunedSimpleNet( + n_hidden=n_hidden, + qlinear_args={ + "weight_bit_width": 3, + "weight_quant": CommonWeightQuant, + "bias": True, + "bias_quant": None, + "narrow_range": True, + }, + qidentity_args={"bit_width": 3, "act_quant": CommonActQuant}, +) +torch_model.prune(20) + +torch_model = torch_model.to(device) +optimizer = torch.optim.AdamW(torch_model.parameters(), lr=0.001) +accuracy = train( + torch_model, + X_train, + X_test, + y_train, + y_test, + criterion, + optimizer, + epochs=n_epochs, + batch_size=batch_size, + device=device, +) +torch_model.unprune() + +torch_model.eval() +# pylint: disable=not-callable +fp32_pred = ( + torch_model(torch.tensor(X_test).float().to(device)).cpu().argmax(1).float().detach().numpy() +) + +# pylint: enable=not-callable + +plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test.astype(numpy.float64)) +plt.title("Original test set") +plt.show() + +plt.scatter(X_test[:, 0], X_test[:, 1], c=fp32_pred) +plt.title("Torch: Predictions on test set") +plt.show() + +# We need to unprune the model before compiling +torch_model.unprune() + +# Move torch_model to CPU +torch_model = torch_model.cpu() + +# Compile the model using a representative input-set +quantized_numpy_module = compile_brevitas_qat_model(torch_model, X_train) + +prediction_simulated = test_in_fhe(quantized_numpy_module, X_test, y_test, simulate=True) + +# Reduce the test set for faster running time +FHE_SAMPLE = 10 + +prediction_fhe = test_in_fhe( + quantized_numpy_module, X_test[:FHE_SAMPLE], y_test[:FHE_SAMPLE], simulate=False +) + +class TorchSKLearnWrapper: + def __init__(self, torch_model): + self.torch_model = torch_model + self.fitted = True + + def fit(self): + return self + + @staticmethod + def __sklearn_is_fitted__(): + return True + + def predict(self, X): + self.torch_model.eval() + y_pred = self.torch_model(torch.tensor(X).float()).argmax(1).float().detach().numpy() + return y_pred + + def predict_proba(self, X): + self.torch_model.eval() + y_pred = self.torch_model(torch.tensor(X).float())[:, 1].float().detach().numpy() + return y_pred + +class ConcreteSKLearnWrapper: + def __init__(self, quantized_module: QuantizedModule): + self.quantized_module = quantized_module + self.fitted = True + + def fit(self): + return self + + @staticmethod + def __sklearn_is_fitted__(): + return True + + def predict(self, X, progress_bar=False): + predictions = numpy.zeros((X.shape[0],)) + for idx, x in enumerate(tqdm(X, disable=not progress_bar)): + predictions[idx] = self.quantized_module.forward( + numpy.expand_dims(x, 0), fhe="simulate" + ).argmax(axis=1) + return predictions + + def predict_proba(self, X, progress_bar=False): + predictions = numpy.zeros(shape=(X.shape[0], 2)) + for idx, x in enumerate(tqdm(X, disable=not progress_bar)): + predictions[idx] = self.quantized_module.forward( + numpy.expand_dims(x, 0), fhe="simulate" + )[0] + return predictions + +plt.scatter(X_test[:, 0], X_test[:, 1], c=prediction_simulated) +plt.title("Concrete ML predictions on test set") +plt.show() + +epsilon = 0.1 +base = 5 +max_value = 1 + epsilon +min_value = 0 - epsilon +grid_resolution = 100 +fig, axs = plt.subplots(figsize=(base * 3, base), ncols=3) +for ax in axs: + ax.set_xlim([min_value, max_value]) + ax.set_ylim([min_value, max_value]) + +xx0, xx1 = numpy.meshgrid( + numpy.linspace(min_value, max_value, grid_resolution), + numpy.linspace(min_value, max_value, grid_resolution), +) + +X_grid = numpy.c_[xx0.ravel(), xx1.ravel()] +y_pred_torch = TorchSKLearnWrapper(torch_model).predict(X_grid) +y_pred_concrete = ConcreteSKLearnWrapper(quantized_numpy_module).predict(X_grid) + +axs[1].contourf(xx0, xx1, y_pred_torch.reshape(xx0.shape)) +axs[2].contourf(xx0, xx1, y_pred_concrete.reshape(xx0.shape)) + +axs[0].scatter(X_test[:, 0], X_test[:, 1], c=prediction_simulated, marker="x") +axs[0].set_title("Ground truth") +axs[1].set_title("Float32 predictions") +axs[2].set_title("Concrete ML predictions") +plt.show() + + + +# Code from: ./PoissonRegression.ipynb +-------------------------------------------------------------------------------- + +import time + +import numpy as np +import sklearn +from sklearn.datasets import fetch_openml +from sklearn.linear_model import PoissonRegressor as SklearnPoissonRegressor +from sklearn.metrics import mean_poisson_deviance +from sklearn.model_selection import train_test_split + +from concrete.ml.sklearn import PoissonRegressor as ConcretePoissonRegressor + +%matplotlib inline + +import matplotlib.pyplot as plt +from IPython.display import display + +df, _ = fetch_openml( + data_id=41214, as_frame=True, cache=True, data_home="~/.cache/sklearn", return_X_y=True +) +df = df.head(50000) + +df["Frequency"] = df["ClaimNb"] / df["Exposure"] + +plt.ioff() +fig, ax = plt.subplots(1, 2, figsize=(15, 7)) +fig.patch.set_facecolor("white") +ax[0].set_title("Frequency of claims vs. Driver Age") +ax[0].set_xlabel("Driver Age") +ax[0].set_ylabel("Frequency of claims") +ax[0].scatter(df["DrivAge"], df["Frequency"], marker="o", color="#ffb700") +ax[1].set_title("Histogram of Frequency of claims") +ax[1].set_xlabel("Frequency of claims") +ax[1].set_ylabel("Count") +df["Frequency"].hist(bins=30, log=True, ax=ax[1], color="black") +display(fig) + +df_train, df_test = train_test_split(df, test_size=0.2, random_state=0) + +train_data = df_train["DrivAge"].values.reshape(-1, 1).astype(np.float64) +test_data = np.sort(df_test["DrivAge"].values).reshape(-1, 1).astype(np.float64) + +sklearn_pr = SklearnPoissonRegressor(max_iter=300) +sklearn_pr.fit(train_data, df_train["Frequency"], sample_weight=df_train["Exposure"]); + +sklearn_predictions = sklearn_pr.predict(test_data) + +plt.clf() +fig, ax = plt.subplots(1, figsize=(12, 8)) +fig.patch.set_facecolor("white") +ax.plot(test_data, sklearn_predictions, color="black", label="Float clear trend line") +ax.scatter(df_test["DrivAge"], df_test["Frequency"], marker="o", color="#ffb700") +ax.set_xlabel("Driver Age") +ax.set_ylim(0, 10) +ax.set_title("Regression with sklearn") +ax.set_ylabel("Frequency of claims") +ax.legend(loc="upper right") +display(fig) + +concrete_pr = ConcretePoissonRegressor(n_bits=8) +concrete_pr.fit(train_data, df_train["Frequency"], sample_weight=df_train["Exposure"]) + +concrete_predictions = concrete_pr.predict(test_data) + +y_true = df_test["Frequency"] +sample_weight = df_test["Exposure"] + +sklearn_score = mean_poisson_deviance(y_true, sklearn_predictions, sample_weight=sample_weight) +concrete_score = mean_poisson_deviance(y_true, concrete_predictions, sample_weight=sample_weight) + +print(f"mean Poisson deviance (scikit-learn): {sklearn_score:.4f}") +print(f"mean Poisson deviance (Concrete ML): {concrete_score:.4f}") + +plt.clf() +fig, ax = plt.subplots(1, figsize=(12, 8)) +fig.patch.set_facecolor("white") + +# Plot the scikit-learn in clear model's main trend line +ax.plot( + test_data, + sklearn_predictions, + color="black", + label=f"scikit-learn float, d={sklearn_score:.3f}", +) + +# Plot the Concrete quantized in clear model's main trend line +ax.plot( + test_data, + concrete_predictions, + color="red", + label=f"Concrete ML quantized, d={concrete_score:.3f}", +) + +# Plot the test data +ax.scatter(df_test["DrivAge"], df_test["Frequency"], marker="o", color="gray", label="Test data") + +# Parametrize the main figure +ax.set_xlabel("Driver Age") +ax.set_ylim(0, 10) +ax.set_title("Poisson Regression, float in clear and quantized in clear trend lines") +ax.set_ylabel("Frequency of claims") +ax.legend(loc="upper left") +ax.grid() + + +# Set a zoomed-in figure +axins = ax.inset_axes([0.5, 0.5, 0.47, 0.47]) + +# Plot the scikit-learn in clear model's zoomed trend line +axins.plot( + test_data, + sklearn_predictions, + color="black", +) + +# Plot the Concrete quantized in clear model's zoomed trend line +axins.plot( + test_data, + concrete_predictions, + color="red", +) + +# Parametrize the zoomed figure +x1, x2, y1, y2 = 60, 65, 0.3, 0.7 +axins.set_xlim(x1, x2) +axins.set_ylim(y1, y2) +axins.grid() +ax.indicate_inset_zoom(axins, edgecolor="black") + +display(fig) + +fhe_circuit = concrete_pr.compile(train_data) + +print(f"Generating a key for an {fhe_circuit.graph.maximum_integer_bit_width()}-bit circuit") + +time_begin = time.time() +fhe_circuit.client.keygen(force=False) +print(f"Key generation time: {time.time() - time_begin:.4f} seconds") + +time_begin = time.time() +concrete_predictions_fhe = concrete_pr.predict(test_data, fhe="execute") +print(f"Execution time: {(time.time() - time_begin) / len(test_data):.4f} seconds per sample") + +concrete_fhe_score = mean_poisson_deviance( + y_true, concrete_predictions_fhe, sample_weight=sample_weight +) + +print(f"mean Poisson deviance (Concrete FHE): {concrete_fhe_score:.4f}") + +plt.clf() +fig, ax = plt.subplots(1, figsize=(12, 8)) +fig.patch.set_facecolor("white") + +# Plot the scikit-learn in clear model's main trend line +ax.plot( + test_data, + sklearn_predictions, + color="black", + label=f"scikit-learn float, d={sklearn_score:.3f}", +) + +# Plot the Concrete quantized in clear model's main trend line +ax.plot( + test_data, + concrete_predictions, + color="red", + label=f"Concrete ML quantized, d={concrete_score:.3f}", +) + +# Plot the Concrete FHE model's main trend line +ax.plot( + test_data, + concrete_predictions_fhe, + color="blue", + label=f"Concrete ML FHE, d={concrete_fhe_score:.3f}", +) + +# Plot the test data +ax.scatter(df_test["DrivAge"], df_test["Frequency"], marker="o", color="gray", label="Test data") + +# Parametrize the main figure +ax.set_xlabel("Driver Age") +ax.set_ylim(0, 10) +ax.set_title("Poisson Regression, float in clear, quantized in clear and FHE trend lines") +ax.set_ylabel("Frequency of claims") +ax.legend(loc="upper left") +ax.grid() + +# Set a zoomed-in figure +axins = ax.inset_axes([0.5, 0.5, 0.47, 0.47]) + +# Plot the scikit-learn in clear model's zoomed trend line +axins.plot( + test_data, + sklearn_predictions, + color="black", +) + +# Plot the Concrete FHE model's zoomed trend line +axins.plot( + test_data, + concrete_predictions, + color="red", +) + +# Plot the Concrete FHE model's zoomed trend line +axins.plot( + test_data, + concrete_predictions_fhe, + color="blue", +) + +# Parametrize the zoomed figure +x1, x2, y1, y2 = 60, 65, 0.3, 0.7 +axins.set_xlim(x1, x2) +axins.set_ylim(y1, y2) +axins.grid() +ax.indicate_inset_zoom(axins, edgecolor="black") + +display(fig) + +import warnings + +from sklearn.compose import ColumnTransformer +from sklearn.pipeline import Pipeline, make_pipeline +from sklearn.preprocessing import ( + FunctionTransformer, + KBinsDiscretizer, + OneHotEncoder, + StandardScaler, +) + +warnings.filterwarnings("ignore") + +sklearn_sparse_arg = ( + {"sparse": False} if "1.1." in sklearn.__version__ else {"sparse_output": False} +) + +log_scale_transformer = make_pipeline(FunctionTransformer(np.log, validate=False), StandardScaler()) + +linear_model_preprocessor = ColumnTransformer( + [ + ("passthrough_numeric", "passthrough", ["BonusMalus"]), + ("binned_numeric", KBinsDiscretizer(n_bins=10), ["VehAge", "DrivAge"]), + ("log_scaled_numeric", log_scale_transformer, ["Density"]), + ( + "onehot_categorical", + OneHotEncoder(**sklearn_sparse_arg), + ["VehBrand", "VehPower", "VehGas", "Region", "Area"], + ), + ], + remainder="drop", +) + +sklearn_pr = Pipeline( + [ + ("preprocessor", linear_model_preprocessor), + ("regressor", SklearnPoissonRegressor()), + ] +) + +n_bits = 16 +concrete_pr = Pipeline( + [ + ("preprocessor", linear_model_preprocessor), + ("regressor", ConcretePoissonRegressor(n_bits=n_bits)), + ] +) + +sklearn_pr.fit(df_train, df_train["Frequency"], regressor__sample_weight=df_train["Exposure"]) + +concrete_pr.fit(df_train, df_train["Frequency"], regressor__sample_weight=df_train["Exposure"]); + +def score_estimator(estimator, df_test, fhe="disable"): + """Score an estimator on the test set.""" + + if fhe == "execute": + time_begin = time.time() + y_pred = estimator.predict(df_test, fhe="execute") + print( + f"FHE execution time: {(time.time() - time_begin) / len(df_test):.4f} " + "seconds per sample\n" + ) + + else: + y_pred = estimator.predict(df_test) + + y_pred = np.squeeze(y_pred) + y_true = df_test["Frequency"] + sample_weight = df_test["Exposure"] + + # Ignore non-positive predictions, as they are invalid for the Tweedie deviance (except if + # power is equal to 0, making the model equivalent to a Linear Regression). We want to + # issue a warning if for some reason (e.g., low quantization, user error), the regressor + # predictions are negative. + + # Find all strictly positive values + mask = y_pred > 0 + + # If any non-positive values are found, issue a warning + if (~mask).any(): + n_masked, n_samples = (~mask).sum(), mask.shape[0] + print( + "WARNING: Estimator yields invalid, non-positive predictions " + f"for {n_masked} samples out of {n_samples}. These predictions " + "are ignored when computing the Poisson deviance." + ) + + return mean_poisson_deviance(y_true[mask], y_pred[mask], sample_weight=sample_weight[mask]) + +sklearn_score = score_estimator(sklearn_pr, df_test) +concrete_score = score_estimator(concrete_pr, df_test) + +print(f"scikit-learn (clear) deviance score: {sklearn_score:.4f}") +print(f"Concrete'ML (FHE) deviance score: {concrete_score:.4f}") + +# Measure the error of the FHE quantized model with respect to the clear scikit-learn +# float model +score_difference = abs(concrete_score - sklearn_score) * 100 / sklearn_score +print( + "Relative difference between scikit-learn (clear) and Concrete-ml (FHE) scores:", + f"{score_difference:.2f}%\n", +) + +n_bits_values = list(range(2, 20)) +concrete_deviance_scores = [] +for n_bits in n_bits_values: + concrete_regressor = Pipeline( + [ + ("preprocessor", linear_model_preprocessor), + ("regressor", ConcretePoissonRegressor(n_bits=n_bits)), + ] + ) + concrete_regressor.fit( + df_train, df_train["Frequency"], regressor__sample_weight=df_train["Exposure"] + ) + concrete_deviance_scores.append(score_estimator(concrete_regressor, df_test)) + +plt.clf() +fig, ax = plt.subplots(1, figsize=(12, 8)) +fig.patch.set_facecolor("white") +ax.hlines(y=sklearn_score, xmax=2, xmin=19, color="r", label="scikit-learn") +ax.plot(n_bits_values, concrete_deviance_scores, label="Concrete ML") +ax.set_xlabel("Number of bits") +ax.set_ylabel("Poisson deviance") +ax.set_xticks(n_bits_values) +ax.set_xticklabels([str(k) for k in n_bits_values]) +ax.grid() +ax.legend(loc="upper right") +display(fig) + +n_bits = 11 + +poisson_regressor_fhe = Pipeline( + [ + ("preprocessor", linear_model_preprocessor), + ("regressor", ConcretePoissonRegressor(n_bits=n_bits)), + ] +) +poisson_regressor_fhe.fit( + df_train, df_train["Frequency"], regressor__sample_weight=df_train["Exposure"] +); + +# Compile needs some preprocessed data in order to run. +df_test_processed = poisson_regressor_fhe["preprocessor"].transform(df_test) + +# pylint: disable-next=no-member +fhe_circuit = poisson_regressor_fhe["regressor"].compile(df_test_processed) + +print(f"Generating a key for an {fhe_circuit.graph.maximum_integer_bit_width()}-bit circuit") + +time_begin = time.time() +fhe_circuit.client.keygen(force=False) +print(f"Key generation time: {time.time() - time_begin:.4f} seconds") + +# Reducing the test set from 10000 to 1000 for faster FHE execution +df_test = df_test[:1000] + +concrete_score_fhe = score_estimator(poisson_regressor_fhe, df_test, fhe="execute") + +print(f"scikit-learn (clear) deviance score: {score_estimator(sklearn_pr, df_test):.4f}") +print(f"Concrete ML (FHE) deviance score: {concrete_score_fhe:.4f}") + +# Measure the error of the FHE quantized model with respect to the clear scikit-learn +# float model +score_difference = abs(concrete_score - sklearn_score) * 100 / sklearn_score +print( + "Relative difference between scikit-learn (clear) and Concrete-ml (FHE) scores:", + f"{score_difference:.2f}%\n", +) + + + +# Code from: ./XGBClassifier.ipynb +-------------------------------------------------------------------------------- + +import warnings + +warnings.simplefilter(action="ignore", category=FutureWarning) + +import time + +import matplotlib.pyplot as plt +import numpy +from concrete.compiler import check_gpu_available +from matplotlib.colors import ListedColormap +from sklearn.datasets import fetch_openml, make_circles +from sklearn.metrics import accuracy_score, make_scorer, matthews_corrcoef +from sklearn.model_selection import GridSearchCV, train_test_split +from xgboost.sklearn import XGBClassifier as SklearnXGBClassifier + +from concrete.ml.sklearn import XGBClassifier as ConcreteXGBClassifier + +use_gpu_if_available = False +device = "cuda" if use_gpu_if_available and check_gpu_available() else "cpu" + +%matplotlib inline + +X, y = make_circles(n_samples=1000, noise=0.1, factor=0.6, random_state=0) + +# Define the figure size and color +plt.figure(figsize=(10, 6)) +cm_bright = ListedColormap(["#FF0000", "#FFFFFF", "#0000FF"]) + +plt.scatter(X[:, 0], X[:, 1], c=y, s=10, cmap=cm_bright) +plt.show() + +# Define the parameters used for initialization +n_estimators = 50 +max_depth = 4 +n_bits = 6 + +# Define the parameters used for training +fit_extra_param = {"eval_metric": "logloss"} + +sklearn_model = SklearnXGBClassifier(n_estimators=n_estimators, max_depth=max_depth) +sklearn_model.fit(X, y, **fit_extra_param); + +concrete_model = ConcreteXGBClassifier( + n_bits=n_bits, n_estimators=n_estimators, max_depth=max_depth +) +concrete_model.fit(X, y); + +def plot_contour(model, X, y, title=""): + """Plot the contour lines given a model and a data-set.""" + # Create a grid will lots of point to plot the contour of the decision function + x_min, x_max = X[:, 0].min() - 0.1, X[:, 0].max() + 0.1 + y_min, y_max = X[:, 1].min() - 0.1, X[:, 1].max() + 0.1 + grid_x, grid_y = numpy.meshgrid( + numpy.arange(x_min, x_max, 0.1), numpy.arange(y_min, y_max, 0.1) + ) + + # Predict the function value on the grid. For the Concrete ML model, this inference is done in + # the clear, which is expected to exactly match the FHE inference. + grid_z = model.predict_proba(numpy.c_[grid_x.ravel(), grid_y.ravel()])[:, 1] + + grid_z = grid_z.reshape(grid_x.shape) + + # Define the plot size + plt.figure(figsize=(10, 6)) + + # Plot the contour and training examples + plt.contourf(grid_x, grid_y, grid_z, cmap=cm_bright, alpha=0.2) + plt.scatter(X[:, 0], X[:, 1], c=y, s=1, cmap=cm_bright) + plt.title(title) + plt.show() + +plot_contour(sklearn_model, X, y, title="Scikit-Learn XGBoost Classifier") + +plot_contour(concrete_model, X, y, title="Concrete ML XGBoost Classifier") + +# Load the data-set +X, y = fetch_openml(name="diabetes", as_frame=False, cache=True, return_X_y=True) + +# Replace (binary) target values by integers +y[y == "tested_positive"] = 1 +y[y == "tested_negative"] = 0 +y = y.astype(numpy.int64) + +# Create scorer with the MCC metric +grid_scorer = make_scorer(matthews_corrcoef, greater_is_better=True) + +# Define the number of estimators to consider for the following gridsearch +n_estimators = [1, 5, 10, 20] + [20 * i for i in range(2, 11)] + [50 * i for i in range(5, 11)] + +param_grid = { + "max_depth": [2], + "n_estimators": n_estimators, +} + +sklearn_grid_search = GridSearchCV( + SklearnXGBClassifier(), + param_grid, + cv=5, + scoring=grid_scorer, + error_score="raise", + verbose=1, +) + +sklearn_grid_search.fit(X, y, **fit_extra_param); + +param_grid = { + "n_bits": [6], + "max_depth": [2], + "n_estimators": n_estimators, +} + +concrete_grid_search = GridSearchCV( + ConcreteXGBClassifier(), + param_grid, + cv=5, + scoring=grid_scorer, + error_score="raise", + verbose=1, +) + +concrete_grid_search.fit(X, y); + +# Print the best MCC score for both models +print(f"Best MCC score for Scikit-Learn: {sklearn_grid_search.best_score_:.2f}") +print(f"Best MCC score Concrete ML: {concrete_grid_search.best_score_:.2f}") + +# Define the figure size +plt.figure(figsize=(10, 6)) + +# Plot the mean_test_score of both model along the n_estimators hyper parameter +plt.plot( + concrete_grid_search.cv_results_["param_n_estimators"], + concrete_grid_search.cv_results_["mean_test_score"], + label="Concrete ML", +) +plt.plot( + sklearn_grid_search.cv_results_["param_n_estimators"], + sklearn_grid_search.cv_results_["mean_test_score"], + label="Scikit-Learn", +) +plt.xlabel("n_estimators") +plt.ylabel("MCC") +plt.legend() +plt.show() + +best_params_sklearn = sklearn_grid_search.best_params_ +print(f"Best parameters found for the Scikit-Learn model: {best_params_sklearn}") + +best_params_concrete = concrete_grid_search.best_params_ +print(f"Best parameters found for the Concrete ML model: {best_params_concrete}") + +# Define the Concrete ML and Scikit-Learn models +concrete_model = ConcreteXGBClassifier(**best_params_concrete) +sklearn_model = SklearnXGBClassifier(**best_params_sklearn) + +# Split the data into a train and test set +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) + +# Fit both models +concrete_model.fit(X_train, y_train, **fit_extra_param) +sklearn_model.fit(X_train, y_train, **fit_extra_param); + +# Compile the Concrete ML model using the training data +circuit = concrete_model.compile(X_train, device=device) + +print(f"Generating a key for an {circuit.graph.maximum_integer_bit_width()}-bits circuit") + +# Generate the key +time_begin = time.time() +circuit.client.keygen(force=False) +print(f"Key generation time: {time.time() - time_begin:.2f} seconds") + +# Compute the predictions using the Scikit-Learn model +y_pred_sklearn = sklearn_model.predict(X_test) + +# Compute the predictions using the Concrete ML model with FHE simulation +y_pred_simulated = concrete_model.predict(X_test, fhe="simulate") + +print("Accuracy scores:") +print( + f"- Scikit-Learn (clear floating points): {accuracy_score(y_test, y_pred_sklearn)*100:.2f}%\n" + f"- Concrete ML (clear quantized): {accuracy_score(y_test, y_pred_simulated)*100:.2f}\n" +) + +N_SAMPLE_FHE = 10 + +# Pick N_SAMPLE_FHE random samples from the test set +idx_test = numpy.random.choice(X_test.shape[0], N_SAMPLE_FHE, replace=False) +X_test_fhe = X_test[idx_test] +y_test_fhe = y_test[idx_test] + +# Compute the predictions using the Concrete ML (quantized) model in the clear +y_preds_clear = concrete_model.predict(X_test_fhe) + +# Compute the predictions using the Concrete ML model in FHE +time_begin = time.time() +y_preds_fhe = concrete_model.predict(X_test_fhe, fhe="execute") +print(f"FHE execution time: {(time.time() - time_begin) / len(X_test_fhe):.2f} seconds per sample") + +# Compare the clear quantized inference vs FHE inference +print( + f"{(y_preds_fhe == y_preds_clear).sum()}/{N_SAMPLE_FHE} " + "FHE predictions match the clear quantized predictions" +) + + + +# Code from: ./GLMComparison.ipynb +-------------------------------------------------------------------------------- + +# Source : https://scikit-learn.org/stable/auto_examples/linear_model/plot_tweedie_regression_insurance_claims.html # noqa # pylint: disable=line-too-long + +# Authors: Christian Lorentzen +# Roman Yurchak +# Olivier Grisel +# Modified to integrate Concrete ML functions by Zama +# License: BSD 3 clause + +import sys +import time +from collections import defaultdict +from timeit import default_timer as timer + +import numpy as np +import sklearn +from sklearn.compose import ColumnTransformer +from sklearn.datasets import fetch_openml +from sklearn.linear_model import GammaRegressor as SklearnGammaRegressor +from sklearn.linear_model import PoissonRegressor as SklearnPoissonRegressor +from sklearn.linear_model import TweedieRegressor as SklearnTweedieRegressor +from sklearn.metrics import mean_gamma_deviance, mean_poisson_deviance, mean_tweedie_deviance +from sklearn.model_selection import train_test_split +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import ( + FunctionTransformer, + KBinsDiscretizer, + OneHotEncoder, + StandardScaler, +) + +from concrete.ml.sklearn import GammaRegressor as ConcreteGammaRegressor +from concrete.ml.sklearn import PoissonRegressor as ConcretePoissonRegressor +from concrete.ml.sklearn import TweedieRegressor as ConcreteTweedieRegressor + +%matplotlib inline + +import matplotlib.pyplot as plt +from IPython.display import display + +# Getting the original data-set containing the risk features +# Link: https://www.openml.org/d/41214 +risks_data, _ = fetch_openml( + data_id=41214, as_frame=True, cache=True, data_home="~/.cache/sklearn", return_X_y=True +) + +# Getting the data set containing claims amount +# Link: https://www.openml.org/d/41215 +claims_data, _ = fetch_openml( + data_id=41215, as_frame=True, cache=True, data_home="~/.cache/sklearn", return_X_y=True +) + +# Set IDpol as index +risks_data["IDpol"] = risks_data["IDpol"].astype(int) +risks_data.set_index("IDpol", inplace=True) + +# Grouping claims mounts together if they are associated with the same policy +claims_data = claims_data.groupby("IDpol").sum() + +# Merging the two sets over policy IDs +data = risks_data.join(claims_data, how="left") + +# Only keeping the first 100 000 for faster running time +data = data.head(100000) + +# Filtering out unknown claim amounts +data["ClaimAmount"].fillna(0, inplace=True) + +# Filtering out claims with zero amount, as the severity (gamma) model +# requires strictly positive target values +data.loc[(data["ClaimAmount"] == 0) & (data["ClaimNb"] >= 1), "ClaimNb"] = 0 + +# Removing unreasonable outliers +data["ClaimNb"] = data["ClaimNb"].clip(upper=4) +data["Exposure"] = data["Exposure"].clip(upper=1) +data["ClaimAmount"] = data["ClaimAmount"].clip(upper=200000) + +sklearn_sparse_arg = ( + {"sparse": False} if "1.1." in sklearn.__version__ else {"sparse_output": False} +) +log_scale_transformer = make_pipeline(FunctionTransformer(np.log, validate=False), StandardScaler()) + +linear_model_preprocessor = ColumnTransformer( + [ + ("passthrough_numeric", "passthrough", ["BonusMalus"]), + ("binned_numeric", KBinsDiscretizer(n_bins=10), ["VehAge", "DrivAge"]), + ("log_scaled_numeric", log_scale_transformer, ["Density"]), + ( + "onehot_categorical", + OneHotEncoder(**sklearn_sparse_arg), + ["VehBrand", "VehPower", "VehGas", "Region", "Area"], + ), + ], + remainder="drop", +) + +x = linear_model_preprocessor.fit_transform(data) + +# Creating target values for Poisson +data["Frequency"] = data["ClaimNb"] / data["Exposure"] + +# Creating target values for Gamma +data["AvgClaimAmount"] = data["ClaimAmount"] / np.fmax(data["ClaimNb"], 1) + +# Creating target values for Tweedie +# Insurances companies are interested in modeling the Pure Premium, that is the expected total +# claim amount per unit of exposure for each policyholder in their portfolio +data["PurePremium"] = data["ClaimAmount"] / data["Exposure"] + +plt.ioff() +fig, ax = plt.subplots(1, 3, figsize=(15, 7)) + +# Set the figure's main parameters +fig.patch.set_facecolor("white") +fig.suptitle("Different target values distribution") +fig.supylabel("Count") + +# Frequency of claims distribution +ax[0].set_title("Poisson") +ax[0].set_xlabel("Frequency of claims") +data["Frequency"].hist(bins=30, log=True, ax=ax[0], color="black") + +# Average amount of claims distribution +ax[1].set_title("Gamma") +ax[1].set_xlabel("Average amount of claims") +data["AvgClaimAmount"].hist(bins=30, log=True, ax=ax[1], color="blue") + +# PurePrenium distribution +ax[2].set_title("Tweedie") +ax[2].set_xlabel("PurePrenium") +data["PurePremium"].hist(bins=30, log=True, ax=ax[2], color="red") + +display(fig) + +train_data, test_data, x_train_data, x_test_data = train_test_split( + data, + x, + test_size=0.2, + random_state=0, +) +_, test_data, _, x_test_data = train_test_split( + test_data, + x_test_data, + test_size=50, + random_state=0, +) + +gamma_mask_train = train_data["ClaimAmount"] > 0 +gamma_mask_test = test_data["ClaimAmount"] > 0 + + +parameters_glms = { + "Poisson": { + "sklearn": SklearnPoissonRegressor, + "concrete": ConcretePoissonRegressor, + "init_parameters": { + "alpha": 1e-3, + "max_iter": 400, + }, + "fit_parameters": { + "X": x_train_data, + "y": train_data["Frequency"], + "sample_weight": train_data["Exposure"], + }, + "x_test": x_test_data, + "score_parameters": { + "y_true": test_data["Frequency"], + "sample_weight": test_data["Exposure"], + }, + "deviance": mean_poisson_deviance, + }, + "Gamma": { + "sklearn": SklearnGammaRegressor, + "concrete": ConcreteGammaRegressor, + "init_parameters": { + "alpha": 10.0, + "max_iter": 300, + }, + "fit_parameters": { + "X": x_train_data[gamma_mask_train], + "y": train_data[gamma_mask_train]["AvgClaimAmount"], + "sample_weight": train_data[gamma_mask_train]["ClaimNb"], + }, + "x_test": x_test_data[gamma_mask_test], + "score_parameters": { + "y_true": test_data[gamma_mask_test]["AvgClaimAmount"], + "sample_weight": test_data[gamma_mask_test]["ClaimNb"], + }, + "deviance": mean_gamma_deviance, + }, + "Tweedie": { + "sklearn": SklearnTweedieRegressor, + "concrete": ConcreteTweedieRegressor, + "init_parameters": { + "power": 1.9, + "alpha": 0.1, + "max_iter": 10000, + }, + "fit_parameters": { + "X": x_train_data, + "y": train_data["PurePremium"], + "sample_weight": train_data["Exposure"], + }, + "x_test": x_test_data, + "score_parameters": { + "y_true": test_data["PurePremium"], + "sample_weight": test_data["Exposure"], + "power": 1.9, + }, + "deviance": mean_tweedie_deviance, + }, +} + +def compare_regressors(n_bits, fhe="simulate"): + # pylint: disable=too-many-locals + scores = defaultdict(list) + predictions = defaultdict(list) + + for glm, parameters_glm in parameters_glms.items(): + # Retrieve the regressors + sklearn_class = parameters_glm["sklearn"] + concrete_class = parameters_glm["concrete"] + + # Instantiate the models + init_parameters = parameters_glm["init_parameters"] + sklearn_glm = sklearn_class(**init_parameters) + concrete_glm = concrete_class(n_bits=n_bits, **init_parameters) + + # Fit the models + fit_parameters = parameters_glm["fit_parameters"] + sklearn_glm.fit(**fit_parameters) + concrete_glm.fit(**fit_parameters) + + x_train_subset = fit_parameters["X"][:100] + # Compile the Concrete ML model if it needs to be executed in FHE + if fhe in ["execute", "simulate"]: + circuit = concrete_glm.compile(x_train_subset) + + # Generate the key + print( + "Generating a key for an " + f"{circuit.graph.maximum_integer_bit_width()}-bit circuit" + ) + sys.stdout.flush() + + time_begin = time.time() + circuit.client.keygen(force=False) + print(f"Key generation time: {time.time() - time_begin:.4f} seconds") + + # Compute the predictions using sklearn (floating points, in the clear) + x_test = parameters_glm["x_test"] + sklearn_predictions = sklearn_glm.predict(x_test) + + # Compute the predictions using Concrete ML (quantized, in the clear) + concrete_q_predictions = concrete_glm.predict(x_test) + + # Compute the predictions using Concrete ML (in FHE) + start = timer() + concrete_predictions = concrete_glm.predict( + x_test, + fhe=fhe, + ) + end = timer() + run_time = end - start + + # Compute the deviance scores + mean_deviance = parameters_glm["deviance"] + score_parameters = parameters_glm["score_parameters"] + sklearn_score = mean_deviance(y_pred=sklearn_predictions, **score_parameters) + concrete_q_score = mean_deviance(y_pred=concrete_q_predictions, **score_parameters) + concrete_score = mean_deviance(y_pred=concrete_predictions, **score_parameters) + + # Print the deviance scores + fhe_message = "in FHE" if fhe == "execute" else "in clear" + print(f"Mean {glm} deviance (scikit-learn): {sklearn_score:.4f}") + print(f"Mean {glm} deviance (Concrete ML, quantized): {concrete_q_score:.4f}") + print( + f"Mean {glm} deviance (Concrete ML {fhe_message}, " + f"with {run_time / len(x_test):.4f} seconds " + f"per inference): {concrete_score:.4f}" + ) + + # Measure the error of the FHE quantized model with respect to the clear scikit-learn + # float model + score_difference = abs(concrete_score - sklearn_score) * 100 / sklearn_score + print( + "Relative difference between scikit-learn (clear) and Concrete-ml (FHE) scores:", + f"{score_difference:.2f}%\n", + ) + + # Store the results + scores["sklearn"].append(sklearn_score) + scores["concrete"].append(concrete_score) + predictions["sklearn"].append(sklearn_predictions) + predictions["concrete"].append(concrete_predictions) + + return scores, predictions + +n_bits = 11 +fhe = "execute" + +scores, predictions = compare_regressors(n_bits, fhe=fhe) + + + +# Code from: ./ClassifierComparison.ipynb +-------------------------------------------------------------------------------- + +# Source: +# https://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html + +# Code source: Gaël Varoquaux +# Andreas Müller +# Modified for documentation by Jaques Grobler +# Modified to integrate Concrete ML functions by Zama +# License: BSD 3 clause + +import warnings + +warnings.simplefilter(action="ignore", category=FutureWarning) + +from functools import partial + +import torch + +from concrete.ml.sklearn import ( + DecisionTreeClassifier, + LinearSVC, + LogisticRegression, + NeuralNetClassifier, + RandomForestClassifier, + XGBClassifier, +) + +# The simulation mode allows to measure the impact of FHE execution on accuracy +# without paying the cost of FHE computations. +# However, data is not encrypted when using the simulation: the model performs inference +# on clear data. +%run utils/classifier_comparison_utils.py + +params_neural_net = { + "module__n_w_bits": 2, + "module__n_a_bits": 4, + "module__n_accum_bits": 32, + "module__n_hidden_neurons_multiplier": 6, + "module__n_layers": 2, # 1 hidden layer + "module__activation_function": torch.nn.ReLU, + "max_epochs": 400, + "verbose": 0, + "lr": 0.001, +} + +neural_network_classifiers = [ + ( + partial(NeuralNetClassifier, batch_size=32, **params_neural_net), + "Neural Net", + ), +] + +# pylint: disable-next=undefined-variable +make_classifier_comparison("NN Classifiers", neural_network_classifiers, 0.5, simulate=True) # noqa + +linear_classifiers = [ + (partial(LinearSVC, C=0.025), "Linear SVC"), + (LogisticRegression, "Logistic Regression"), +] + +# pylint: disable-next=undefined-variable +make_classifier_comparison("Linear Classifiers", linear_classifiers, 0, simulate=True, h=1) # noqa + +tree_classifiers = [ + (partial(DecisionTreeClassifier, max_depth=5), "Decision Tree"), + (partial(RandomForestClassifier, max_depth=4, n_estimators=5), "Random Forest"), + (partial(XGBClassifier, n_jobs=1, max_depth=4, n_estimators=5), "XGB"), +] + +# pylint: disable-next=undefined-variable +make_classifier_comparison( # noqa + "Tree-Based Classifiers", tree_classifiers, 0.5, simulate=True, h=0.1 +) + + + +# Code from: ./LogisticRegression.ipynb +-------------------------------------------------------------------------------- + +import time + +import numpy as np +from sklearn.datasets import make_classification +from sklearn.linear_model import LogisticRegression as SklearnLogisticRegression +from sklearn.metrics import accuracy_score +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import MinMaxScaler, StandardScaler + +from concrete.ml.sklearn import LogisticRegression as ConcreteLogisticRegression + +%matplotlib inline + +import matplotlib.pyplot as plt +from IPython.display import display + +X, y = make_classification( + n_samples=200, + n_features=2, + n_redundant=0, + n_informative=2, + random_state=2, + n_clusters_per_class=1, +) + +rng = np.random.RandomState(2) +X += 2 * rng.uniform(size=X.shape) + +b_min = np.min(X, axis=0) +b_max = np.max(X, axis=0) + +x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42) + +x_test_grid, y_test_grid = np.meshgrid( + np.linspace(b_min[0], b_max[0], 30), np.linspace(b_min[1], b_max[1], 30) +) +x_grid_test = np.vstack([x_test_grid.ravel(), y_test_grid.ravel()]).transpose() + +sklearn_logr = SklearnLogisticRegression() +sklearn_logr.fit(x_train, y_train) +y_pred_test = sklearn_logr.predict(x_test) + +# Compute the scikit-learn classifier's probabilities on the domain +y_score_grid = sklearn_logr.predict_proba(x_grid_test)[:, 1] + +plt.ioff() +plt.clf() +fig, ax = plt.subplots(1, figsize=(12, 8)) +fig.patch.set_facecolor("white") +ax.contourf(x_test_grid, y_test_grid, y_score_grid.reshape(x_test_grid.shape), cmap="coolwarm") +CS1 = ax.contour( + x_test_grid, + y_test_grid, + y_score_grid.reshape(x_test_grid.shape), + levels=[0.5], + linewidths=2, +) +CS1.collections[0].set_label("Sklearn decision boundary") +ax.scatter(x_train[:, 0], x_train[:, 1], c=y_train, marker="D", cmap="jet", label="Train data") +ax.scatter(x_test[:, 0], x_test[:, 1], c=y_test, marker="x", cmap="jet", label="Test data") +ax.legend(loc="upper right") +display(fig) + +concrete_logr = ConcreteLogisticRegression(n_bits=8) +concrete_logr.fit(x_train, y_train); + +# Predict on the test set +y_proba_q = concrete_logr.predict_proba(x_test)[:, 1] +y_pred_q = concrete_logr.predict(x_test) + +# Compute the probabilities on the whole domain in order to be able to plot the contours +y_proba_q_grid = concrete_logr.predict_proba(x_grid_test)[:, 1] +y_pred_q_grid = concrete_logr.predict(x_grid_test) + +fhe_circuit = concrete_logr.compile(x_train) + +print(f"Generating a key for an {fhe_circuit.graph.maximum_integer_bit_width()}-bit circuit") + +time_begin = time.time() +fhe_circuit.client.keygen(force=False) +print(f"Key generation time: {time.time() - time_begin:.4f} seconds") + +time_begin = time.time() +y_pred_fhe = concrete_logr.predict(x_test, fhe="execute") +print(f"Execution time: {(time.time() - time_begin) / len(x_test):.4f} seconds per sample") + +sklearn_accuracy = accuracy_score(y_test, y_pred_test) +quantized_accuracy = accuracy_score(y_test, y_pred_q) +fhe_accuracy = accuracy_score(y_test, y_pred_fhe) + +print(f"Sklearn accuracy: {sklearn_accuracy:.4f}") +print(f"Quantized Clear Accuracy: {quantized_accuracy:.4f}") +print(f"FHE Accuracy: {fhe_accuracy:.4f}") + +# Measure the error of the FHE quantized model with respect to the clear quantized model +concrete_score_difference = abs(fhe_accuracy - quantized_accuracy) +print( + "\nRelative difference between Concrete-ml (quantized clear) and Concrete-ml (FHE) scores:", + f"{concrete_score_difference:.2f}%", +) + +# Measure the error of the FHE quantized model with respect to the clear scikit-learn float model +score_difference = abs(fhe_accuracy - sklearn_accuracy) +print( + "Relative difference between scikit-learn (clear) and Concrete-ml (FHE) scores:", + f"{score_difference:.2f}%", +) + +plt.clf() +fig, ax = plt.subplots(1, figsize=(12, 8)) +fig.patch.set_facecolor("white") +ax.contourf(x_test_grid, y_test_grid, y_proba_q_grid.reshape(x_test_grid.shape), cmap="coolwarm") +CS1 = ax.contour( + x_test_grid, + y_test_grid, + y_proba_q_grid.reshape(x_test_grid.shape), + levels=[0.5], + linewidths=2, +) +ax.scatter(x_train[:, 0], x_train[:, 1], c=y_train, cmap="jet", marker="D") +ax.scatter(x_test[:, 0], x_test[:, 1], c=y_pred_q, cmap="jet", marker="x") +CS2 = ax.contour( + x_test_grid, + y_test_grid, + y_score_grid.reshape(x_test_grid.shape), + levels=[0.5], + linewidths=2, + linestyles="dashed", + cmap="hot", +) +ax.clabel(CS1, CS1.levels, inline=True, fontsize=10) +ax.clabel(CS2, CS2.levels, inline=True, fontsize=10) +CS1.collections[0].set_label(f"FHE decision boundary, acc={fhe_accuracy:.2f}") +CS2.collections[0].set_label(f"Sklearn decision boundary, acc={sklearn_accuracy:.2f}") +ax.legend(loc="upper right") +display(fig) + +from utils.scaling_comparison_utils import plot_data + +scaler = MinMaxScaler((-1, 1)) +x_train_scaled = scaler.fit_transform(x_train) +x_test_scaled = scaler.transform(x_test) + +scaler = StandardScaler() +x_train_normalized = scaler.fit_transform(x_train) +x_test_normalized = scaler.transform(x_test) + +x_train_unscaled = x_train_scaled.copy() +x_train_unscaled[:, 0] *= 100 + +x_test_unscaled = x_test_scaled.copy() +x_test_unscaled[:, 0] *= 100 + +x_train_shifted = x_train_scaled.copy() +x_train_shifted[:, 0] += 100 + +x_test_shifted = x_test_scaled.copy() +x_test_shifted[:, 0] += 100 + +n_bits = 12 +random_state = 0 + +fig, axes = plt.subplots(ncols=2, nrows=5, figsize=(8 * 3, 8 * 4)) +models = [ConcreteLogisticRegression(n_bits=n_bits, random_state=random_state) for _ in range(5)] +features_trains = [x_train, x_train_scaled, x_train_normalized, x_train_unscaled, x_train_shifted] +targets_trains = [y_train, y_train, y_train, y_train, y_train] +features_tests = [x_test, x_test_scaled, x_test_normalized, x_test_unscaled, x_test_shifted] +targets_tests = [y_test, y_test, y_test, y_test, y_test] +names = ["unchanged", "min-max-transformed", "normalized", "unscaled", "shifted"] + +for ax, model, features_train, targets_train, features_test, targets_test, name in zip( + axes, + models, + features_trains, + targets_trains, + features_tests, + targets_tests, + names, +): + plot_data(ax, features_train, targets_train, features_test, targets_test, model, name, h=1) +display(fig) + + + +# Code from: ./LoraMLP.ipynb +-------------------------------------------------------------------------------- + +import shutil +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np +import torch +from peft import LoraConfig, get_peft_model +from sklearn.datasets import make_circles, make_moons +from torch import nn, optim +from torch.utils.data import DataLoader, TensorDataset + +from concrete.ml.torch.lora import LoraTrainer + +# Set random seed for reproducibility +SEED = 42 +np.random.seed(SEED) +torch.manual_seed(SEED) + +# Task 1: Two interleaving half circles (make_moons) +X_task1, y_task1 = make_moons(n_samples=500, noise=0.1) +# Task 2: Two concentric circles +X_task2, y_task2 = make_circles(n_samples=500, noise=0.2, factor=0.5) + + +def plot_datasets_and_boundaries(X_task1, y_task1, X_task2, y_task2, model=None, titles=None): + _, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6)) + + if titles is None: + titles = ["Task 1 Dataset", "Task 2 Dataset"] + + for ax, X, y, title in zip([ax1, ax2], [X_task1, X_task2], [y_task1, y_task2], titles): + ax.scatter(X[:, 0], X[:, 1], c=y, cmap="viridis", edgecolor="k") + ax.set_title(title) + + if model is not None: + x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5 + y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5 + h = 0.1 # step size in the mesh + xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) + grid = torch.FloatTensor(np.c_[xx.ravel(), yy.ravel()]) + + with torch.no_grad(): + Z = model(grid) + probabilities = torch.softmax(Z, dim=1) + Z = probabilities[:, 1].numpy().reshape(xx.shape) + + ax.contourf(xx, yy, Z, cmap="viridis", alpha=0.3) + + plt.tight_layout() + plt.show() + + +# Plot datasets +plot_datasets_and_boundaries(X_task1, y_task1, X_task2, y_task2) + +# Convert datasets to PyTorch tensors +X_task1 = torch.FloatTensor(X_task1) +y_task1 = torch.LongTensor(y_task1) +X_task2 = torch.FloatTensor(X_task2) +y_task2 = torch.LongTensor(y_task2) + +# Create DataLoaders +batch_size = 32 +train_loader_task1 = DataLoader( + TensorDataset(X_task1, y_task1), batch_size=batch_size, shuffle=True +) +train_loader_task2 = DataLoader( + TensorDataset(X_task2, y_task2), batch_size=batch_size, shuffle=True +) + +# Define an MLP model without LoRA layers + + +class SimpleMLP(nn.Module): + """Simple MLP model without LoRA layers.""" + + def __init__(self, input_size=2, hidden_size=128, num_classes=2): + super().__init__() + self.fc1 = nn.Linear(input_size, hidden_size) + self.relu = nn.ReLU() + self.fc2 = nn.Linear(hidden_size, num_classes) + + def forward(self, x): + """Forward pass of the MLP.""" + out = self.fc1(x) + out = self.relu(out) + out = self.fc2(out) + return out + + +# Instantiate the model +model = SimpleMLP() + +# Training loop for Task 1 + + +def train_model(model, train_loader, num_epochs=100): + """Train the model. + + Args: + model (nn.Module): The model to train. + train_loader (DataLoader): DataLoader for training data. + num_epochs (int): Number of epochs to train. + """ + device = torch.device("cpu") + model.to(device) + model.train() + + criterion = nn.CrossEntropyLoss() + optimizer = optim.Adam(model.parameters(), lr=0.01) + + for epoch in range(num_epochs): + total_loss = 0 + for x_batch, y_batch in train_loader: + x_batch = x_batch.to(device) + y_batch = y_batch.to(device) + + optimizer.zero_grad() + outputs = model(x_batch) + loss = criterion(outputs, y_batch) + loss.backward() + optimizer.step() + + total_loss += loss.item() + + # Print loss every 20 epochs + if (epoch + 1) % 20 == 0: + print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}") + + +# Train the model on Task 1 +print("Training on Task 1 without LoRA:") +train_model(model, train_loader_task1, num_epochs=20) + +# Plot datasets with decision boundaries +plot_datasets_and_boundaries( + X_task1.numpy(), + y_task1.numpy(), + X_task2.numpy(), + y_task2.numpy(), + model=model, + titles=["Task 1 after Training", "Task 2 after Training"], +) + +# Apply LoRA to the model using peft +lora_config = LoraConfig( + r=1, lora_alpha=1, lora_dropout=0.01, target_modules=["fc1", "fc2"], bias="none" +) + +peft_model = get_peft_model(model, lora_config) + +# Update training parameters, including loss function +optimizer = optim.Adam(filter(lambda p: p.requires_grad, peft_model.parameters()), lr=0.01) +loss_fn = nn.CrossEntropyLoss() +training_args = {"gradient_accumulation_steps": 1} + +# Set up LoRA training +lora_trainer = LoraTrainer( + peft_model, optimizer=optimizer, loss_fn=loss_fn, training_args=training_args +) + +# Prepare input data for calibration +batch_size_per_task = batch_size // 2 +inputset = ( + torch.cat([X_task1[:batch_size_per_task], X_task2[:batch_size_per_task]]), + torch.cat([y_task1[:batch_size_per_task], y_task2[:batch_size_per_task]]), +) + +# Compile the model +lora_trainer.compile(inputset, n_bits=8) + +# Fine-tune the model on Task 2 using LoRA +lora_trainer.train(train_loader_task2, num_epochs=10, fhe="execute") + +# Enable LoRA adapters (already enabled by default) +peft_model.enable_adapter_layers() + +# Plot datasets with decision boundaries after fine-tuning +plot_datasets_and_boundaries( + X_task1.numpy(), + y_task1.numpy(), + X_task2.numpy(), + y_task2.numpy(), + model=peft_model, + titles=["Task 1 after Fine-tuning", "Task 2 after Fine-tuning"], +) + +# Disable LoRA adapters +peft_model.disable_adapter_layers() + +# Plot datasets with decision boundaries after fine-tuning +plot_datasets_and_boundaries( + X_task1.numpy(), + y_task1.numpy(), + X_task2.numpy(), + y_task2.numpy(), + model=peft_model, + titles=["Task 1 after Fine-tuning", "Task 2 after Fine-tuning"], +) + +# Enable LoRA adapters (already enabled by default) +peft_model.enable_adapter_layers() + +# Print trainable (lora) parameters +peft_model.print_trainable_parameters() + +# Save the model and remove all layers that will be done on the server +path = Path("lora_mlp") + +if path.is_dir() and any(path.iterdir()): + shutil.rmtree(path) + +lora_trainer.save_and_clear_private_info(path) + +# At this point, the hybrid_model only contains the trainable parameters of the LoRA layers. +peft_model.print_trainable_parameters() + + + +# Code from: ./ImportingFromScikitLearn.ipynb +-------------------------------------------------------------------------------- + +from functools import partial + +# The simulation mode allows to measure the impact of FHE execution on accuracy +# without paying the cost of FHE computations. +# However, data is not encrypted when using the simulation: the model performs inference +# on clear data. + + +def make_classifier_comparison_from_sklearn(*args, **kwargs): + return args, kwargs + + +%run utils/classifier_comparison_utils.py + +from concrete.ml.sklearn import ( + DecisionTreeClassifier, + LinearSVC, + LogisticRegression, + RandomForestClassifier, + XGBClassifier, +) + +%%time + +linear_classifiers = [ + (partial(LinearSVC, C=0.025), "Linear SVC"), + (LogisticRegression, "Logistic Regression"), +] + +# pylint: disable-next=undefined-variable +make_classifier_comparison_from_sklearn( + "Linear Classifiers", linear_classifiers, 0, simulate=True, h=1 +) # noqa + +%%time + +tree_classifiers = [ + (partial(DecisionTreeClassifier, max_depth=5), "Decision Tree"), + (partial(RandomForestClassifier, max_depth=4, n_estimators=5), "Random Forest"), + (partial(XGBClassifier, n_jobs=1, max_depth=4, n_estimators=5), "XGB"), +] + +# pylint: disable-next=undefined-variable +make_classifier_comparison_from_sklearn( # noqa + "Tree-Based Classifiers", tree_classifiers, 0.5, simulate=True, h=0.1 +) + + + +# Code from: ./DecisionTreeRegressor.ipynb +-------------------------------------------------------------------------------- + +import sys +import time + +import numpy +from sklearn.datasets import fetch_california_housing +from sklearn.linear_model import LinearRegression +from sklearn.metrics import mean_absolute_error +from sklearn.model_selection import train_test_split +from sklearn.utils import resample + +import concrete.ml +from concrete.ml.sklearn import DecisionTreeRegressor as ConcreteDecisionTreeRegressor + +print(f"Using ConcreteML version {concrete.ml.version.__version__}") +print(f"With Python version {sys.version}") + +features_all, target_all = fetch_california_housing(return_X_y=True) +features, target = resample(features_all, target_all, replace=True, n_samples=6000, random_state=42) + +# Split data in train-test groups +x_train, x_test, y_train, y_test = train_test_split( + features, + target, + test_size=0.15, + random_state=42, +) + +%matplotlib inline +import matplotlib.pyplot as plt + +plt.hist(target, bins=15, density=True) +plt.show() + +# Utility functions + + +def print_as_dollars(x): + """Prints the value * 100'000$""" + return f"{x * 10**5:.2f}$" + + +def print_compare_to_baseline(x, baseline_error): + """Prints percentage improvement over baseline""" + return f"{(x - baseline_error) / baseline_error * 100 :.2f}% of baseline" + + +mean_error = mean_absolute_error(y_test, numpy.repeat([numpy.median(y_test)], y_test.shape)) +print(f"Mean Absolute Overall Error : {print_as_dollars(mean_error)}") + +canary = LinearRegression() +canary.fit(x_train[:, :1], y_train) +baseline_error = mean_absolute_error(canary.predict(x_test[:, :1]), y_test) +print(f"Baseline Mean Error : {print_as_dollars(baseline_error)}") + +default_model = ConcreteDecisionTreeRegressor(criterion="absolute_error", n_bits=6, random_state=42) + +begin = time.time() +default_model.fit(x_train, y_train) +print(f"Training on {x_train.shape[0]} samples in {(time.time() - begin):.4f} seconds") + +default_error = mean_absolute_error(default_model.predict(x_test), y_test) +print( + f"Default Model Mean Error: {print_as_dollars(default_error)}," + f"{print_compare_to_baseline(default_error, baseline_error)}" +) + +# Find best hyper parameters with cross validation +from sklearn.model_selection import GridSearchCV + +# List of hyper parameters to tune +param_grid = { + "criterion": ["absolute_error"], + "random_state": [42], + "max_depth": [10], + "n_bits": [6, 7], + "max_features": [2, 5], + "min_samples_leaf": [2, 5], + "min_samples_split": [2, 10], +} + +grid_search = GridSearchCV( + ConcreteDecisionTreeRegressor(), + param_grid, + cv=3, + scoring="neg_mean_absolute_error", + error_score="raise", + n_jobs=1, +) + +gs_results = grid_search.fit(x_train, y_train) +print("Best hyper parameters:", gs_results.best_params_) +print(f"Min lost: {print_as_dollars(-gs_results.best_score_)}") + +# We fix all parameters as the best ones, except for n_bits. +best = gs_results.best_params_ +cv_errors = [ + {"n_bits": params["n_bits"], "score": score} + for params, score in zip( + gs_results.cv_results_["params"], gs_results.cv_results_["mean_test_score"] + ) + if (params["max_depth"] == best["max_depth"]) + and (params["max_features"] == best["max_features"]) # noqa: W503 + and (params["min_samples_leaf"] == best["min_samples_leaf"]) # noqa: W503 + and (params["min_samples_split"] == best["min_samples_split"]) # noqa: W503 +] +for el in cv_errors: + print(f"Error for n_bits={el['n_bits']} is {print_as_dollars(-el['score'])}") + +# Build the model with best hyper parameters +model = ConcreteDecisionTreeRegressor( + max_depth=gs_results.best_params_["max_depth"], + max_features=gs_results.best_params_["max_features"], + min_samples_leaf=gs_results.best_params_["min_samples_leaf"], + min_samples_split=gs_results.best_params_["min_samples_split"], + n_bits=6, + random_state=42, +) + +model, sklearn_model = model.fit_benchmark(x_train, y_train) + +# Compute average precision on test +y_pred_concrete = model.predict(x_test) +y_pred_sklearn = sklearn_model.predict(x_test) +concrete_average_precision = mean_absolute_error(y_test, y_pred_concrete) +sklearn_average_precision = mean_absolute_error(y_test, y_pred_sklearn) +print( + f"Sklearn Mean Error: {print_as_dollars(sklearn_average_precision)}," + f"{print_compare_to_baseline(sklearn_average_precision, baseline_error)}" +) +print( + f"Concrete Mean Error: {print_as_dollars(concrete_average_precision)}," + f"{print_compare_to_baseline(concrete_average_precision, baseline_error)}" +) + +from concrete.compiler import check_gpu_available + +use_gpu_if_available = False +device = "cuda" if use_gpu_if_available and check_gpu_available() else "cpu" + +x_train_subset = x_train[:500] + +begin = time.time() +circuit = model.compile(x_train_subset, device=device) +print(f"Compiled with {len(x_train_subset)} samples in {(time.time() - begin):.4f} seconds") + +print(f"Generating a key for an {circuit.graph.maximum_integer_bit_width()}-bit circuit") +time_begin = time.time() +circuit.client.keygen(force=False) +print(f"Key generation time: {time.time() - time_begin:.2f} seconds") + +FHE_SAMPLES = 3 +x_test_small = x_test[:FHE_SAMPLES] +y_pred = y_test[:FHE_SAMPLES] + +# Predict in FHE for a few examples +time_begin = time.time() +y_pred_fhe = model.predict(x_test_small, fhe="execute") +print(f"Execution time: {(time.time() - time_begin) / FHE_SAMPLES:.2f} seconds per sample") + +# Check prediction FHE vs sklearn +print("Cipher estimates:") +print(f"{', '.join(f'{print_as_dollars(x)}' for x in y_pred_fhe)}") +print("Plain estimates:") +print(f"{', '.join(f'{print_as_dollars(x)}' for x in y_pred)}") +print("Differences:") +print(f"{', '.join(f'{print_as_dollars(x)}' for x in (y_pred_fhe - y_pred))}") + +# Concatenate all the steps in one function of n_bits + + +def evaluate(n_bits): + model = ConcreteDecisionTreeRegressor( + max_depth=gs_results.best_params_["max_depth"], + max_features=gs_results.best_params_["max_features"], + min_samples_leaf=gs_results.best_params_["min_samples_leaf"], + min_samples_split=gs_results.best_params_["min_samples_split"], + n_bits=n_bits, + random_state=42, + ) + + model, sklearn_model = model.fit_benchmark(x_train, y_train) + + y_pred_concrete = model.predict(x_test) + y_pred_sklearn = sklearn_model.predict(x_test) + + concrete_average_precision = mean_absolute_error(y_test, y_pred_concrete) + sklearn_average_precision = mean_absolute_error(y_test, y_pred_sklearn) + + print( + f"Sklearn Mean Error: {print_as_dollars(sklearn_average_precision)}," + f"{print_compare_to_baseline(sklearn_average_precision, baseline_error)}" + ) + print( + f"Concrete Mean Error: {print_as_dollars(concrete_average_precision)}," + f"{print_compare_to_baseline(concrete_average_precision, baseline_error)}" + ) + + x_train_subset = x_train[:500] + begin = time.time() + circuit = model.compile(x_train_subset) + print( + f"Circuit compiled with {len(x_train_subset)} samples in {(time.time() - begin):.4f} " + "seconds" + ) + print(f"Generating a key for an {circuit.graph.maximum_integer_bit_width()}-bit circuit") + + time_begin = time.time() + circuit.client.keygen(force=False) + print(f"Key generation time: {time.time() - time_begin:.2f} seconds") + + time_begin = time.time() + model.predict(x_test_small, fhe="execute") + print(f"Execution time: {(time.time() - time_begin) / FHE_SAMPLES:.2f} seconds per sample") + + +for n_bits in [6, 7]: + header = f"N_BITS = {n_bits}" + print(header) + print("-" * len(header)) + evaluate(n_bits) + print() + + + +# Code from: ./DecisionTreeClassifier.ipynb +-------------------------------------------------------------------------------- + +import time + +import numpy +from sklearn.datasets import fetch_openml +from sklearn.model_selection import train_test_split + +features, classes = fetch_openml(data_id=44, as_frame=False, cache=True, return_X_y=True) +classes = classes.astype(numpy.int64) + +x_train, x_test, y_train, y_test = train_test_split( + features, + classes, + test_size=0.15, + random_state=42, +) + +# Find best hyper parameters with cross validation +from sklearn.model_selection import GridSearchCV + +from concrete.ml.sklearn import DecisionTreeClassifier as ConcreteDecisionTreeClassifier + +# List of hyper parameters to tune +param_grid = { + "max_features": [None, "auto", "sqrt", "log2"], + "min_samples_leaf": [1, 10, 100], + "min_samples_split": [2, 10, 100], + "max_depth": [None, 2, 4, 6, 8], +} + +grid_search = GridSearchCV( + ConcreteDecisionTreeClassifier(), + param_grid, + cv=10, + scoring="average_precision", + error_score="raise", + n_jobs=1, +) + +gs_results = grid_search.fit(x_train, y_train) +print("Best hyper parameters:", gs_results.best_params_) +print("Best score:", gs_results.best_score_) + +# Build the model with best hyper parameters +model = ConcreteDecisionTreeClassifier( + max_features=gs_results.best_params_["max_features"], + min_samples_leaf=gs_results.best_params_["min_samples_leaf"], + min_samples_split=gs_results.best_params_["min_samples_split"], + max_depth=gs_results.best_params_["max_depth"], + n_bits=6, +) + +model, sklearn_model = model.fit_benchmark(x_train, y_train) + +# Compute average precision on test +from sklearn.metrics import average_precision_score + +# pylint: disable=no-member +y_pred_concrete = model.predict_proba(x_test)[:, 1] +y_pred_sklearn = sklearn_model.predict_proba(x_test)[:, 1] +concrete_average_precision = average_precision_score(y_test, y_pred_concrete) +sklearn_average_precision = average_precision_score(y_test, y_pred_sklearn) +print(f"Sklearn average precision score: {sklearn_average_precision:0.2f}") +print(f"Concrete average precision score: {concrete_average_precision:0.2f}") + +# Show the confusion matrix on x_test +from sklearn.metrics import confusion_matrix + +y_pred = model.predict(x_test) +true_negative, false_positive, false_negative, true_positive = confusion_matrix( + y_test, y_pred, normalize="true" +).ravel() + +num_samples = len(y_test) +num_spam = sum(y_test) + +print(f"Number of test samples: {num_samples}") +print(f"Number of spams in test samples: {num_spam}") + +print(f"True Negative (legit mail well classified) rate: {true_negative}") +print(f"False Positive (legit mail classified as spam) rate: {false_positive}") +print(f"False Negative (spam mail classified as legit) rate: {false_negative}") +print(f"True Positive (spam well classified) rate: {true_positive}") + +from concrete.compiler import check_gpu_available + +use_gpu_if_available = False +device = "cuda" if use_gpu_if_available and check_gpu_available() else "cpu" + +# We first compile the model with some data, here the training set +circuit = model.compile(x_train, device=device) + +print(f"Generating a key for an {circuit.graph.maximum_integer_bit_width()}-bit circuit") + +time_begin = time.time() +circuit.client.keygen(force=False) +print(f"Key generation time: {time.time() - time_begin:.2f} seconds") + +# Reduce the sample size for a faster total execution time +FHE_SAMPLES = 10 +x_test = x_test[:FHE_SAMPLES] +y_pred = y_pred[:FHE_SAMPLES] +y_reference = y_test[:FHE_SAMPLES] + +# Predict in FHE for a few examples +time_begin = time.time() +y_pred_fhe = model.predict(x_test, fhe="execute") +print(f"Execution time: {(time.time() - time_begin) / len(x_test):.2f} seconds per sample") + +# Check prediction FHE vs sklearn +print(f"Ground truth: {y_reference}") +print(f"Prediction sklearn: {y_pred}") +print(f"Prediction FHE: {y_pred_fhe}") + +print( + f"{numpy.sum(y_pred_fhe == y_pred)}/" + "10 predictions are similar between the FHE model and the clear sklearn model." +) + + + +# Code from: ./RegressorComparison.ipynb +-------------------------------------------------------------------------------- + +import warnings + +warnings.simplefilter(action="ignore", category=FutureWarning) + + +import time +from functools import partial + +import matplotlib.pyplot as plt +import numpy as np +import torch +from sklearn.metrics import r2_score +from sklearn.model_selection import train_test_split +from sklearn.neural_network import MLPRegressor +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import PolynomialFeatures, StandardScaler + +from concrete.ml.sklearn import ( + DecisionTreeRegressor, + LinearRegression, + LinearSVR, + NeuralNetRegressor, + RandomForestRegressor, + XGBRegressor, +) + +%matplotlib inline + +rng = np.random.RandomState(42) + +def make_regression_data( + n_samples=200, + n_features=1, + bias=0.0, + noise_scale=1.0, + loc=0.0, + scale=1.0, + polynomial_exp=1, + target_scale=1.0, + feature_scale=1.0, +): + """ + Generates a dataset for regression models. + """ + X = rng.randn(n_samples, n_features) + # To avoid to have to big numbers on polynomial datasets + if polynomial_exp > 1: + feature_scale = 1 + X = feature_scale * np.sort(X, 0) + scale = scale * polynomial_exp + noise = noise_scale * rng.normal(loc=loc, scale=scale, size=n_samples) + y = X.ravel() ** polynomial_exp + bias + noise + y *= target_scale + return X, y + +# pylint: disable=too-many-locals,too-many-statements + + +def make_regressor_comparison(title, regressors, **kwargs): + print(title) + + # Create subplots where each column represents a polynomial degree + subplot_col = kwargs.get("polynomial_exp", 1) + fig, axs = plt.subplots(len(regressors), subplot_col, figsize=(15, 8), sharex=False) + + # Create data-sets for each polynomial degree + for i in range(subplot_col): + kwargs_copy = kwargs.copy() + kwargs_copy["polynomial_exp"] = i + 1 + X, y = make_regression_data(**kwargs_copy) + + # Split the data into training and test sets + # Use 15 percent (30 points for a data-set of 200 points) for prediction + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) + + sort_test_index = np.argsort(X_test.ravel()) + X_test = X_test[sort_test_index, :] + y_test = y_test[sort_test_index] + + # Feature preprocessing + # Linear models require polynomial features to be applied before training + # to fit a non-linear model and other models perform better with this transoformation + pipe = Pipeline( + [ + ("poly", PolynomialFeatures(i + 1)), + ("scaler", StandardScaler()), + ] + ) + + X_poly_train = pipe.fit_transform(X_train) + X_poly_test = pipe.transform(X_test) + + # Iterate over the given regressors + for j, (regressor, model_name) in enumerate(regressors): + print(f"Evaluation of {model_name}") + if np.ndim(axs) > 1: + axs[0, i].set_title(f"Polynomial degree {i + 1}") + ax = axs[j, i] + else: + try: + axs[i].set_title(f"Polynomial degree {i + 1}") + ax = axs[i] + except IndexError: + ax = axs + ax.set_title(f"Polynomial degree {i + 1}") + + # Plot the training points + ax.scatter( + X_train, + y_train, + edgecolors="k", + label="Train data", + ) + + # Plot the testing points + ax.scatter( + X_test, + y_test, + marker="D", + alpha=0.6, + edgecolors="k", + label="Test data", + ) + + # Instantiate the model + model = regressor() + + # Train the model and retrieve both the Concrete ML model and its equivalent one from + # scikit-learn + # If the model is a NeuralNetClassifier, instantiate a scikit-learn MLPClassifier + # separately in order to be able to be able to compare the results with a float model + # that doesn't use QAT + if model.__class__ == NeuralNetRegressor: + + sklearn_model = MLPRegressor( + alpha=1, + activation="identity", + max_iter=1000, + hidden_layer_sizes=(25,), + learning_rate_init=0.005, + ) + sklearn_model.fit(X_poly_train, y_train) + + # When we apply PolynomialFeatures the input dim is equal to degree of polynome + 1 + model.module__input_dim = i + 2 + concrete_model = model.fit(X_poly_train, y_train.reshape(-1, 1)) + + else: + + concrete_model, sklearn_model = model.fit_benchmark(X_poly_train, y_train) + + # Compute the predictions in clear using the scikit-learn model + sklearn_y_pred = sklearn_model.predict(X_poly_test) + + # Compile the Contrete-ML model + circuit = concrete_model.compile(X_poly_train) + + print( + "Generating a key for a " f"{circuit.graph.maximum_integer_bit_width()}-bit circuit" + ) + + time_begin = time.time() + circuit.client.keygen(force=False) + time_end = time.time() + print(f"Key generation time: {time_end - time_begin:.2f} seconds") + + # Compute the predictions in FHE using the Concrete ML model + time_begin = time.time() + concrete_y_pred = concrete_model.predict(X_poly_test[:1], fhe="execute") + time_end = time.time() + + print(f"Execution time: {(time_end - time_begin):.2f} " "seconds per sample in FHE") + + # Compute predictions for all test examples with the simulate mode + concrete_y_pred = concrete_model.predict(X_poly_test, fhe="simulate") + + # Measure the R2 score + sklearn_score = r2_score(sklearn_y_pred, y_test) + concrete_score = r2_score(concrete_y_pred, y_test) + + is_a_tree_based_model = concrete_model.__class__ in [ + DecisionTreeRegressor, + RandomForestRegressor, + XGBRegressor, + ] + + # If the model is not a tree-based model, retrieve the maximum integer bitwidth + # reached within its circuit. + bitwidth = None + if not is_a_tree_based_model: + bitwidth = circuit.graph.maximum_integer_bit_width() + + # Plot the predictions + ax.plot(X_test, concrete_y_pred, c="blue", linewidth=2.5, label="Concrete ML") + + # Plot the predictions + ax.plot(X_test, sklearn_y_pred, c="red", linewidth=2.5, label="scikit-learn") + + ax.text( + 0.5, + 0.80, + f"Concrete ML R2: {concrete_score:.2f}\n scikit-learn R2: {sklearn_score:.2f}\n", + transform=ax.transAxes, + fontsize=12, + va="top", + ha="right", + ) + if bitwidth: + ax.text( + 0.75, + 0.1, + f"bitwidth={bitwidth}", + transform=ax.transAxes, + fontsize=12, + va="bottom", + ha="left", + ) + handles, labels = ax.get_legend_handles_labels() + fig.legend(handles, labels, loc="upper left") + + scaler = 0.5 + if len(regressors) == 3: + scaler = 0.3 + fig.text( + -0.05, 0.75 - j * scaler, f"{model_name}", ha="center", va="bottom", fontsize=14 + ) + + plt.tight_layout(pad=1.2) + plt.show() + +params_neural_net = { + "module__n_w_bits": 6, + "module__n_a_bits": 8, + "module__n_accum_bits": 16, + "module__n_hidden_neurons_multiplier": 10, + "module__n_layers": 2, # 1 hidden layer + "module__activation_function": torch.nn.Identity, + "max_epochs": 400, + "verbose": 0, + "lr": 0.1, +} + + +neural_network_regressor = [ + ( + partial(NeuralNetRegressor, batch_size=32, **params_neural_net), + "Neural Net", + ), +] +make_regressor_comparison( + "NN Regressors", + neural_network_regressor, + n_samples=250, + polynomial_exp=3, + bias=20, + scale=0.25, + target_scale=1, + feature_scale=10, +) + +np.random.seed(42) +linear_regressor = [ + (partial(LinearSVR, n_bits={"op_inputs": 5, "op_weights": 2}, C=0.5), "Linear SVR"), + (partial(LinearRegression, n_bits={"op_inputs": 5, "op_weights": 2}), "Linear Regression"), +] +make_regressor_comparison( + "linear", + linear_regressor, + polynomial_exp=3, + bias=20, + scale=0.25, + target_scale=1, + feature_scale=10, +) + +tree_regressors = [ + (partial(DecisionTreeRegressor, n_bits=5, max_depth=5), "Decision Tree"), + (partial(RandomForestRegressor, n_bits=5), "RandomForestRegressor"), + ( + partial(XGBRegressor, n_bits=6, n_estimators=50, max_depth=3, gamma=1, learning_rate=0.3), + "XGB", + ), +] + +make_regressor_comparison( + "Tree-Based Regressors", + tree_regressors, + n_samples=300, + polynomial_exp=3, + bias=20, + scale=0.25, + target_scale=1, + feature_scale=10, +) + + + +# Code from: ./FullyConnectedNeuralNetwork.ipynb +-------------------------------------------------------------------------------- + +import time + +import numpy as np +from matplotlib import pyplot as plt +from sklearn.datasets import load_iris +from sklearn.decomposition import PCA +from sklearn.metrics import accuracy_score +from sklearn.model_selection import train_test_split +from torch import nn +from tqdm import tqdm + +from concrete.ml.sklearn import NeuralNetClassifier + +# Get iris data-set + +X, y = load_iris(return_X_y=True) + +# Split into train and test +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) + +# Scikit-Learn and Concrete ML neural networks only handle float32 input values +X_train, X_test = X_train.astype("float32"), X_test.astype("float32") + +params = { + "module__n_layers": 3, + "module__activation_function": nn.ReLU, + "max_epochs": 1000, + "verbose": 0, +} +model = NeuralNetClassifier(**params) + +model, sklearn_model = model.fit_benchmark(X=X_train, y=y_train) + +# Evaluate the sklearn model, which needs to specifically be of type float32 +y_pred_sklearn = sklearn_model.predict(X_test) + +sklearn_accuracy = accuracy_score(y_test, y_pred_sklearn) * 100 +print(f"The test accuracy of the trained scikit-learn model is {sklearn_accuracy:.2f}%") + +# Evaluate the Concrete ML model in the clear +y_pred_simulated = model.predict(X_test) + +simulated_accuracy = accuracy_score(y_test, y_pred_simulated) * 100 +print(f"The test accuracy of the trained Concrete ML simulated model is {simulated_accuracy:.2f}%") + +# Compile the model to have before +fhe_circuit = model.compile(X_train) + +print("Generating a key for a " f"{fhe_circuit.graph.maximum_integer_bit_width()}-bit circuit") + +time_begin = time.time() +fhe_circuit.client.keygen(force=True) +print(f"Key generation time: {time.time() - time_begin:.2f} seconds") + +fhe_predictions = [] +time_begin = time.time() +for x in tqdm(X_test): + y_ = model.predict(np.array([x]), fhe="execute")[0] + fhe_predictions.append(y_) + +print(f"Execution time: {(time.time() - time_begin) / len(X_test):.2f} seconds per sample") + +fhe_accuracy = accuracy_score(y_test, fhe_predictions) * 100 + +print(f"Test accuracy using the sklearn model: {sklearn_accuracy:.2f}%") +print(f"Test accuracy using the Concrete ML simulated model: {simulated_accuracy:.2f}%") +print(f"Test accuracy using the Concrete ML FHE model: {fhe_accuracy:.2f}%") + +# Create a 2D grid in order to visualize predictions and contours for both models +pca = PCA(n_components=2, random_state=np.random.randint(0, 2**15)) +X_test_2d = pca.fit_transform(X_test) + +b_min = np.min(X_test_2d, axis=0) +b_max = np.max(X_test_2d, axis=0) + +grid_dims = tuple( + np.linspace(b_min[i], b_max[i], 512, dtype=X_test.dtype) for i in range(X_test_2d.shape[1]) +) +ndgrid_tuple = np.meshgrid(*grid_dims) +grid_2d = np.vstack([g.ravel() for g in ndgrid_tuple]).transpose() + +grid_test = pca.inverse_transform(grid_2d) + +# Evaluate the predicted classes using the sklearn model +grid_pred_sklearn = sklearn_model.predict_proba(grid_test) +pred_sklearn_classes = np.argmax(grid_pred_sklearn, axis=1) + +# Evaluate the predicted classes using the Concrete ML simulated model +# Pylint is disabled because it does not seem to be able to understand that `model` is a +# NeuralClassifier instance and support the predict_proba method. This may be solved by removing +# Skorch and Sklearn inheritance +# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3373 +grid_pred_fhe = model.predict_proba(grid_test) # pylint: disable=no-member +pred_fhe_classes = np.argmax(grid_pred_fhe, axis=1) + +%matplotlib inline + +cmap = "autumn" + +classes_to_plot = [ + (pred_sklearn_classes, "Clear Inference (Sklearn)", sklearn_accuracy), + (pred_fhe_classes, "FHE Inference (Concrete ML)", simulated_accuracy), +] + +fig, axes = plt.subplots(1, 2, figsize=(16, 6)) + +for i, (classes, title, accuracy) in enumerate(classes_to_plot): + ax = axes[i] + + # Plot contours based on the predicted classes + ax.contourf( + ndgrid_tuple[0], + ndgrid_tuple[1], + classes.reshape(ndgrid_tuple[0].shape), + cmap=cmap, + label="ookko", + ) + + # Set the title and legend text + ax.set_title(title) + ax.text(1.6, 1, f"accuracy: {accuracy:.2f}", size=12) + + # Plot the test data as a scatter with marker borders + ax.scatter(X_test_2d[:, 0], X_test_2d[:, 1], c=y_test, s=50, edgecolors="k", cmap=cmap) + +fig.suptitle("Decision boundaries", size=15) +plt.show() + + + +# Code from: ./LinearRegression.ipynb +-------------------------------------------------------------------------------- + +import time + +import numpy as np +from sklearn.datasets import make_regression +from sklearn.linear_model import LinearRegression as SklearnLinearRegression +from sklearn.metrics import r2_score +from sklearn.model_selection import train_test_split + +from concrete.ml.sklearn import LinearRegression as ConcreteLinearRegression + +%matplotlib inline + +import matplotlib.pyplot as plt +from IPython.display import display + +train_plot_config = {"c": "black", "marker": "D", "s": 15, "label": "Train data"} +test_plot_config = {"c": "red", "marker": "x", "s": 15, "label": "Test data"} + + +def get_sklearn_plot_config(r2_score=None): + label = "Scikit-Learn" + if r2_score is not None: + label += f", {'$R^2$'}={r2_score:.4f}" + return {"c": "blue", "linewidth": 2.5, "label": label} + + +def get_concrete_plot_config(r2_score=None): + label = "Concrete ML" + if r2_score is not None: + label += f", {'$R^2$'}={r2_score:.4f}" + return {"c": "orange", "linewidth": 2.5, "label": label} + +# pylint: disable=unbalanced-tuple-unpacking +X, y = make_regression( + n_samples=200, n_features=1, n_targets=1, bias=5.0, noise=30.0, random_state=42 +) +# pylint: enable=unbalanced-tuple-unpacking + +# We split the data-set into a training and a testing set +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42) + +# We sort the test set for a better visualization +sorted_indexes = np.argsort(np.squeeze(X_test)) +X_test = X_test[sorted_indexes, :] +y_test = y_test[sorted_indexes] + +plt.ioff() + +plt.clf() +fig, ax = plt.subplots(1, figsize=(10, 5)) +fig.patch.set_facecolor("white") +ax.scatter(X_train, y_train, **train_plot_config) +ax.scatter(X_test, y_test, **test_plot_config) +ax.legend() +display(fig) + +sklearn_lr = SklearnLinearRegression() +sklearn_lr.fit(X_train, y_train) +y_pred = sklearn_lr.predict(X_test) + +# Compute the R2 scores +sklearn_r2_score = r2_score(y_test, y_pred) + +plt.ioff() +plt.clf() + +fig, ax = plt.subplots(1, figsize=(10, 5)) +fig.patch.set_facecolor("white") +ax.scatter(X_train, y_train, **train_plot_config) +ax.scatter(X_test, y_test, **test_plot_config) +ax.plot(X_test, y_pred, **get_sklearn_plot_config(sklearn_r2_score)) +ax.legend() +display(fig) + +# We quantize the inputs using 8-bits +concrete_lr = ConcreteLinearRegression(n_bits=8) + +# We train the concrete linear regression model on clear data +concrete_lr.fit(X_train, y_train) + +# We densify the space representation of the original X, +# to better visualize the resulting step function in the following figure +x_space = np.linspace(X_test.min(), X_test.max(), num=300) +x_space = x_space[:, np.newaxis] +y_pred_q_space = concrete_lr.predict(x_space) + +# Now, we can test our Concrete ML model on the clear test data +y_pred_q = concrete_lr.predict(X_test) + +# Compute the R2 scores +quantized_r2_score = r2_score(y_test, y_pred_q) + +plt.ioff() + +plt.clf() +fig, ax = plt.subplots(1, figsize=(12, 8)) +fig.patch.set_facecolor("white") +ax.scatter(X_train, y_train, **train_plot_config) +ax.scatter(X_test, y_test, **test_plot_config) +ax.plot(X_test, y_pred, **get_sklearn_plot_config(sklearn_r2_score)) +ax.plot(x_space, y_pred_q_space, **get_concrete_plot_config(quantized_r2_score)) +ax.legend() +display(fig) + +fhe_circuit = concrete_lr.compile(X_train) + +print(f"Generating a key for a {fhe_circuit.graph.maximum_integer_bit_width()}-bit circuit") + +time_begin = time.time() +fhe_circuit.client.keygen(force=False) +print(f"Key generation time: {time.time() - time_begin:.4f} seconds") + +time_begin = time.time() +y_pred_fhe = concrete_lr.predict(X_test, fhe="execute") +print(f"Execution time: {(time.time() - time_begin) / len(X_test):.4f} seconds per sample") + +# Measure the FHE R2 score +fhe_r2_score = r2_score(y_test, y_pred_fhe) + +print("R^2 scores:") +print(f"scikit-learn (clear): {sklearn_r2_score:.4f}") +print(f"Concrete ML (quantized): {quantized_r2_score:.4f}") +print(f"Concrete ML (FHE): {fhe_r2_score:.4f}") + +# Measure the error of the FHE quantized model with respect to the clear scikit-learn float model +concrete_score_difference = abs(fhe_r2_score - quantized_r2_score) * 100 / quantized_r2_score +print( + "\nRelative score difference for Concrete ML (quantized clear) vs. Concrete ML (FHE):", + f"{concrete_score_difference:.2f}%", +) + +# Measure the error of the FHE quantized model with respect to the clear float model +score_difference = abs(fhe_r2_score - sklearn_r2_score) * 100 / sklearn_r2_score +print( + "Relative score difference for scikit-learn (clear) vs. Concrete ML (FHE) scores:", + f"{score_difference:.2f}%", +) + +# For better visualization +y_pred_q_space = concrete_lr.predict(x_space) + +plt.clf() +fig, ax = plt.subplots(1, figsize=(12, 8)) +fig.patch.set_facecolor("white") +ax.scatter(X_train, y_train, **train_plot_config) +ax.scatter(X_test, y_test, **test_plot_config) +ax.plot(X_test, y_pred, **get_sklearn_plot_config(sklearn_r2_score)) +ax.plot(x_space, y_pred_q_space, **get_concrete_plot_config(fhe_r2_score)) +ax.legend() + +display(fig) + + + +# Code from: ./ConvolutionalNeuralNetwork.ipynb +-------------------------------------------------------------------------------- + +import time + +import numpy as np +import torch +import torch.utils +from concrete.compiler import check_gpu_available +from sklearn.datasets import load_digits +from sklearn.model_selection import train_test_split +from torch import nn +from torch.utils.data import DataLoader, TensorDataset +from tqdm import tqdm + +from concrete.ml.torch.compile import compile_torch_model + +# And some helpers for visualization. + +%matplotlib inline + +import matplotlib.pyplot as plt + +X, y = load_digits(return_X_y=True) + +# The sklearn Digits data-set, though it contains digit images, keeps these images in vectors +# so we need to reshape them to 2D first. The images are 8x8 px in size and monochrome +X = np.expand_dims(X.reshape((-1, 8, 8)), 1) + +nplot = 4 +fig, ax = plt.subplots(nplot, nplot, figsize=(6, 6)) +for i in range(0, nplot): + for j in range(0, nplot): + ax[i, j].imshow(X[i * nplot + j, ::].squeeze()) +plt.show() + +x_train, x_test, y_train, y_test = train_test_split( + X, y, test_size=0.25, shuffle=True, random_state=42 +) + +class TinyCNN(nn.Module): + """A very small CNN to classify the sklearn digits data-set.""" + + def __init__(self, n_classes) -> None: + """Construct the CNN with a configurable number of classes.""" + super().__init__() + + # This network has a total complexity of 1216 MAC + self.conv1 = nn.Conv2d(1, 8, 3, stride=1, padding=0) + self.conv2 = nn.Conv2d(8, 16, 3, stride=2, padding=0) + self.conv3 = nn.Conv2d(16, 32, 2, stride=1, padding=0) + self.fc1 = nn.Linear(32, n_classes) + + def forward(self, x): + """Run inference on the tiny CNN, apply the decision layer on the reshaped conv output.""" + x = self.conv1(x) + x = torch.relu(x) + x = self.conv2(x) + x = torch.relu(x) + x = self.conv3(x) + x = torch.relu(x) + x = x.flatten(1) + x = self.fc1(x) + return x + +torch.manual_seed(42) + + +def train_one_epoch(net, optimizer, train_loader): + # Cross Entropy loss for classification when not using a softmax layer in the network + loss = nn.CrossEntropyLoss() + + net.train() + avg_loss = 0 + for data, target in train_loader: + optimizer.zero_grad() + output = net(data) + loss_net = loss(output, target.long()) + loss_net.backward() + optimizer.step() + avg_loss += loss_net.item() + + return avg_loss / len(train_loader) + + +# Create the tiny CNN with 10 output classes +N_EPOCHS = 150 + +# Create a train data loader +train_dataset = TensorDataset(torch.Tensor(x_train), torch.Tensor(y_train)) +train_dataloader = DataLoader(train_dataset, batch_size=64) + +# Create a test data loader to supply batches for network evaluation (test) +test_dataset = TensorDataset(torch.Tensor(x_test), torch.Tensor(y_test)) +test_dataloader = DataLoader(test_dataset) + +# Train the network with Adam, output the test set accuracy every epoch +net = TinyCNN(10) +losses_bits = [] +optimizer = torch.optim.Adam(net.parameters()) +for _ in tqdm(range(N_EPOCHS), desc="Training"): + losses_bits.append(train_one_epoch(net, optimizer, train_dataloader)) + +fig = plt.figure(figsize=(8, 4)) +plt.plot(losses_bits) +plt.ylabel("Cross Entropy Loss") +plt.xlabel("Epoch") +plt.title("Training set loss during training") +plt.grid(True) +plt.show() + +def test_torch(net, test_loader): + """Test the network: measure accuracy on the test set.""" + + # Freeze normalization layers + net.eval() + + all_y_pred = np.zeros((len(test_loader)), dtype=np.int64) + all_targets = np.zeros((len(test_loader)), dtype=np.int64) + + # Iterate over the batches + idx = 0 + for data, target in test_loader: + # Accumulate the ground truth labels + endidx = idx + target.shape[0] + all_targets[idx:endidx] = target.numpy() + + # Run forward and get the predicted class id + output = net(data).argmax(1).detach().numpy() + all_y_pred[idx:endidx] = output + + idx += target.shape[0] + + # Print out the accuracy as a percentage + n_correct = np.sum(all_targets == all_y_pred) + print( + f"Test accuracy for fp32 weights and activations: " + f"{n_correct / len(test_loader) * 100:.2f}%" + ) + + +test_torch(net, test_dataloader) + +def test_with_concrete(quantized_module, test_loader, use_sim): + """Test a neural network that is quantized and compiled with Concrete ML.""" + + # Casting the inputs into int64 is recommended + all_y_pred = np.zeros((len(test_loader)), dtype=np.int64) + all_targets = np.zeros((len(test_loader)), dtype=np.int64) + + # Iterate over the test batches and accumulate predictions and ground truth labels in a vector + idx = 0 + for data, target in tqdm(test_loader): + data = data.numpy() + target = target.numpy() + + fhe_mode = "simulate" if use_sim else "execute" + + # Quantize the inputs and cast to appropriate data type + y_pred = quantized_module.forward(data, fhe=fhe_mode) + + endidx = idx + target.shape[0] + + # Accumulate the ground truth labels + all_targets[idx:endidx] = target + + # Get the predicted class id and accumulate the predictions + y_pred = np.argmax(y_pred, axis=1) + all_y_pred[idx:endidx] = y_pred + + # Update the index + idx += target.shape[0] + + # Compute and report results + n_correct = np.sum(all_targets == all_y_pred) + + return n_correct / len(test_loader) + +n_bits = 6 + +use_gpu_if_available = False +device = "cuda" if use_gpu_if_available and check_gpu_available() else "cpu" + +q_module = compile_torch_model(net, x_train, rounding_threshold_bits=6, p_error=0.1, device=device) + +start_time = time.time() +accs = test_with_concrete( + q_module, + test_dataloader, + use_sim=True, +) +sim_time = time.time() - start_time + +print(f"Simulated FHE execution for {n_bits} bit network accuracy: {accs:.2f}%") + +# Generate keys first +t = time.time() +q_module.fhe_circuit.keygen() +print(f"Keygen time: {time.time()-t:.2f}s") + +# Run inference in FHE on a single encrypted example +mini_test_dataset = TensorDataset(torch.Tensor(x_test[:100, :]), torch.Tensor(y_test[:100])) +mini_test_dataloader = DataLoader(mini_test_dataset) + +t = time.time() +accuracy_test = test_with_concrete( + q_module, + mini_test_dataloader, + use_sim=False, +) +elapsed_time = time.time() - t +time_per_inference = elapsed_time / len(mini_test_dataset) +accuracy_percentage = 100 * accuracy_test + +print( + f"Time per inference in FHE: {time_per_inference:.2f} " + f"with {accuracy_percentage:.2f}% accuracy" +) + diff --git a/docs/conventions.md b/docs/conventions.md index b230b8813..72b8fc801 100644 --- a/docs/conventions.md +++ b/docs/conventions.md @@ -23,7 +23,7 @@ Let's use following conventions for the docs. If a new convention needs to be de 1. google is a verb ("you can google" but not "you can Google") : but try to avoid this 1. Programs: - Jupyter - - Concrete ML (no Concrete-ML) + - Concrete ML (no Concrete ML) - pytest except when title where it is capitalized - Python - torch (for the code) and PyTorch (for the product) diff --git a/docs/deep-learning/lora_training.md b/docs/deep-learning/lora_training.md index 7166cdea2..4c4ec053f 100644 --- a/docs/deep-learning/lora_training.md +++ b/docs/deep-learning/lora_training.md @@ -1,39 +1,38 @@ # Encrypted fine-tuning -This document explains how to fine-tune neural-network models and large language-models(LLMs) on private data. +This document explains how to fine-tune neural-network models and large language-models (LLMs) on private data. Small models can be fine-tuned using a single-client/single-server setup. For larger models (such as GPT-2 and above), consider using distributed computation across multiple worker nodes to perform training on encrypted data for optimal latency. ## Overview {% hint style="info" %} -Refer to [this notebook](../advanced_examples/LoraMLP.ipynb) to see the tutorial about applying FHE LORA fine-tuning to a small neural network. +Refer to [this notebook](../advanced_examples/LoraMLP.ipynb) to see the tutorial about applying FHE LoRA fine-tuning to a small neural network. {% endhint %} -Concrete ML supports LORA, a parameter efficient fine-tuning (PEFT) approach, in the [hybrid model](../guides/hybrid-models.md) paradigm. LORA adds adapters, which contain a low number of fine-tunable weights, to the linear layers in an original model. +Concrete ML supports LoRA, a parameter-efficient fine-tuning (PEFT) approach, in the [hybrid model](../guides/hybrid-models.md) paradigm. LoRA adds adapter layers, which contain a small number of trainable parameters, to the linear layers of a base model. -In this setup, Concrete ML outsources the forward and backward passes of the model's original logic to one or more remote servers. Meanwhile, the forward and backward passes over the LORA weights, the loss computation and the weight updates are performed by the client side. As the number of LORA weights is low, this does not significantly increase the computational load for the model training client machine. For large LLMs, over 99% of the model's weights can be outsourced. +In this setup, Concrete ML outsources the computationally intensive parts of forward and backward passes for large models to one or more remote servers. The training client machine only handles the LoRA-adapter forward/backward passes, loss computation, and adapter weight updates. Since the LoRA adapters are small, this additional computation on the client side is minimal. For large LLMs, over 99% of the model's weights can remain outsourced. -The main benefit of hybrid-model LORA training is outsourcing the computation of linear layers, which are typically large in LLMs. These layers require substantial hardware for inference and gradient computation. By securely outsourcing this work, Concrete ML removes the memory bottleneck that previously limited such operations. +The main benefit of hybrid-model LoRA training is outsourcing the computation of linear layers, which are typically large in LLMs. These layers require substantial hardware for inference and gradient computation. By securely outsourcing this work, Concrete ML removes the memory bottleneck that previously limited such operations. ## Usage -Concrete ML integrates with the [`peft` package](https://huggingface.co/docs/peft/index), -which adds LORA layer adapters to a model's linear layers. Here are the steps to convert -a model to hybrid FHE LORA training. +Concrete ML integrates with the [`peft` package](https://huggingface.co/docs/peft/index) to add LoRA adapters to a model's linear layers. Below are the steps to convert a model into a hybrid FHE LoRA training setup. -### 1. Apply the `peft` LORA layers +### 1. Apply the `peft` LoRA layers -The `LoraConfig` class from the `peft` package contains the various LORA parameters. You can specify which layers have LORA adapters through the `target_modules` argument. +The `LoraConfig` class from the `peft` package contains the various LoRA parameters. You can specify which layers have LoRA adapters through the `target_modules` argument. For a detailed reference of the various configuration options, refer to the [`LoraConfig`](https://huggingface.co/docs/peft/package_reference/lora#peft.LoraConfig) documentation. ```python import torch +import torch.nn.functional as F from torch import nn, optim from peft import LoraConfig, get_peft_model -from concrete.ml.torch.lora import LoraTraining, get_remote_names +from concrete.ml.torch.lora import LoraTrainer from concrete.ml.torch.hybrid_model import HybridFHEModel from sklearn.datasets import make_circles from torch.utils.data import DataLoader, TensorDataset @@ -54,115 +53,110 @@ class SimpleMLP(nn.Module): out = self.fc2(out) return out +# Create an initial model +model = SimpleMLP() + +# Apply LoRA configuration lora_config = LoraConfig( - r=1, lora_alpha=1, lora_dropout=0.01, target_modules=["fc1", "fc2"], bias="none" + r=1, + lora_alpha=1, + lora_dropout=0.01, + target_modules=["fc1", "fc2"], + bias="none" ) -model = SimpleMLP() -# The initial training loop of the model should be -# added at this point on an initial data-set +peft_model = get_peft_model(model, lora_config) -# A second data-set, task2 is generated +# Generate a second data-set for demonstration purposes X_task2, y_task2 = make_circles(n_samples=32, noise=0.2, factor=0.5) train_loader_task2 = DataLoader( TensorDataset(torch.Tensor(X_task2), torch.LongTensor(y_task2)), batch_size=32, shuffle=True ) - -# Apply LoRA to the model -peft_model = get_peft_model(model, lora_config) ``` -### 2. Convert the LORA model to use custom Concrete ML layers +### 2. Convert the LoRA model to use custom Concrete ML layers -Concrete ML requires converting the `peft` model to add -FHE compatible layers. In this step, you can configure several fine-tuning -parameters: +Next, we need to integrate the LoRA-adapted `peft_model` into the Concrete ML hybrid FHE training framework. This is done using the `LoraTrainer` class, which handles the logic of encrypting outsourced computations, running the forward and backward passes, and updating the LoRA adapter weights. -- The number of gradient accumulation steps: LORA commonly accumulate gradients over several gradient descent steps before updating weights. -- The optimizer parameters -- The loss function +You can configure: + +- The loss function. +- The optimizer and its parameters. +- Gradient accumulation steps (if needed). ```python -lora_training = LoraTraining(peft_model) - - -# Update training parameters, including loss function -lora_training.update_training_parameters( - optimizer=optim.Adam(filter(lambda p: p.requires_grad, peft_model.parameters()), lr=0.01), - loss_fn=nn.CrossEntropyLoss(), - training_args={"gradient_accumulation_steps": 1}, +# Define a simple loss function +def simple_loss(outputs, targets): + return F.cross_entropy(outputs, targets) + +# Create an Adam optimizer +optimizer = optim.Adam(peft_model.parameters(), lr=1e-3) + +# Initialize trainer with the loss and optimizer +lora_trainer = LoraTrainer( + peft_model, + optimizer=optimizer, + loss_fn=simple_loss, ) - ``` -### 3. Compile a hybrid FHE model for the LORA adapted PyTorch model +### 3. Compile a hybrid FHE model for the LoRA adapted PyTorch model -Compile the hybrid FHE model to convert the selected outsourced layers to use FHE, while the rest will run on the client side. Note that the exchange of encrypted activations and gradients may require significant bandwidth. +Before training in FHE, we need to compile the model. Compilation calibrates and converts the outsourced linear layers to their FHE equivalents. The compile method uses representative data for this step. ```python -# Find layers that can be outsourced -remote_names = get_remote_names(lora_training) - -# Build the hybrid FHE model -hybrid_model = HybridFHEModel(lora_training, module_names=remote_names) - # Build a representative data-set for compilation inputset = ( torch.Tensor(X_task2[:16]), torch.LongTensor(y_task2[:16]), ) -# Calibrate and compile the model -hybrid_model.model.toggle_calibrate(enable=True) -hybrid_model.compile_model(inputset, n_bits=8) -hybrid_model.model.toggle_calibrate(enable=False) +# Calibrate and compile the model with 8-bit quantization +lora_trainer.compile(inputset, n_bits=8) ``` +At this point, the trainer has a hybrid FHE model ready for encrypted execution of the outsourced layers. The LoRA layers remain on the client side in the clear. + ### 4. Train the model on private data -Finally, the hybrid model can be trained, similar to training a PyTorch model. The client handles training data batches generation and iteration. +You can now train the hybrid FHE model with your private data. The train method will run forward and backward passes, updating only the LoRA adapter weights locally while securely outsourcing the main layers’ computations. ```python -# Assume train_loader is a torch.DataLoader - -hybrid_model.model.inference_model.train() -hybrid_model.model.toggle_run_optimizer(enable=True) - -for x_batch, y_batch in train_loader_task2: - loss, _ = hybrid_model((x_batch, y_batch), fhe="execute") +# Train in FHE mode +lora_trainer.train(train_loader_task2, fhe="execute") ``` ## Additional options ### Inference -Once fine-tuned, the LORA hybrid FHE model can perform inference only, through the -`model.inference_model` attribute of the hybrid FHE model. +Once fine-tuned, the LoRA hybrid FHE model can perform inference only, through the +`peft_model` attribute of the hybrid FHE model. ```python -hybrid_model.model.inference_model(x) +peft_model(x) ``` -### Toggle LORA layers +### Toggle LoRA layers -To compare to the original model, you can disable the LORA weights to use the original model for inference. +To compare to the original model, you can disable the LoRA weights to use the original model for inference. ```python -hybrid_model.model.inference_model.disable_adapter_layers() -hybrid_model.model.inference_model(x) +peft_model.disable_adapter_layers() +peft_model(x) -# Re-enable the LORA weights -hybrid_model.model.inference_model.enable_adapter_layers() +# Re-enable the LoRA weights +peft_model.enable_adapter_layers() ``` diff --git a/docs/references/api/concrete.ml.common.serialization.encoder.md b/docs/references/api/concrete.ml.common.serialization.encoder.md index 031e6f15d..763fd11fe 100644 --- a/docs/references/api/concrete.ml.common.serialization.encoder.md +++ b/docs/references/api/concrete.ml.common.serialization.encoder.md @@ -45,7 +45,7 @@ Non-native types are serialized manually and dumped in a custom dict format that The name should be unique for each type, as it is used in the ConcreteDecoder class to detect the initial type and apply the proper load method to the serialized object. The serialized value is the value that was serialized manually in a native type. Additional arguments such as a numpy array's dtype are also properly serialized. If an object has an unexpected type or is not serializable, an error is thrown. -The ConcreteEncoder is only meant to encode Concrete-ML's built-in models and therefore only supports the necessary types. For example, torch.Tensor objects are not serializable using this encoder as built-in models only use numpy arrays. However, the list of supported types might expand in future releases if new models are added and need new types. +The ConcreteEncoder is only meant to encode Concrete ML's built-in models and therefore only supports the necessary types. For example, torch.Tensor objects are not serializable using this encoder as built-in models only use numpy arrays. However, the list of supported types might expand in future releases if new models are added and need new types. ______________________________________________________________________ diff --git a/script/doc_utils/check_forbidden_words.py b/script/doc_utils/check_forbidden_words.py index 11e7cc5b6..a9a9c16a7 100644 --- a/script/doc_utils/check_forbidden_words.py +++ b/script/doc_utils/check_forbidden_words.py @@ -190,6 +190,8 @@ def process_file(file_str: str, do_open_problematic_files=False): ("eg", [], []), # use e.g., ("eg,", [], []), # use e.g., ("eg., ", [], []), # use e.g., + ("Lora", [], []), # use LoRA + ("LORA", [], []), # use LoRA ] # For later # "We" or "Our", or more generally, passive form diff --git a/src/concrete/ml/common/serialization/encoder.py b/src/concrete/ml/common/serialization/encoder.py index d18b14227..1e6dcbaf4 100644 --- a/src/concrete/ml/common/serialization/encoder.py +++ b/src/concrete/ml/common/serialization/encoder.py @@ -68,7 +68,7 @@ class ConcreteEncoder(JSONEncoder): as a numpy array's dtype are also properly serialized. If an object has an unexpected type or is not serializable, an error is thrown. - The ConcreteEncoder is only meant to encode Concrete-ML's built-in models and therefore only + The ConcreteEncoder is only meant to encode Concrete ML's built-in models and therefore only supports the necessary types. For example, torch.Tensor objects are not serializable using this encoder as built-in models only use numpy arrays. However, the list of supported types might expand in future releases if new models are added and need new types. diff --git a/src/concrete/ml/sklearn/glm.py b/src/concrete/ml/sklearn/glm.py index ecdbcf56a..4c7d0b6c8 100644 --- a/src/concrete/ml/sklearn/glm.py +++ b/src/concrete/ml/sklearn/glm.py @@ -83,7 +83,7 @@ def dump_dict(self) -> Dict: metadata: Dict[str, Any] = {} - # Concrete-ML + # Concrete ML metadata["n_bits"] = self.n_bits metadata["sklearn_model"] = self.sklearn_model metadata["_is_fitted"] = self._is_fitted @@ -113,7 +113,7 @@ def load_dict(cls, metadata: Dict): # Instantiate the model obj = cls(n_bits=metadata["n_bits"]) - # Concrete-ML + # Concrete ML obj.n_bits = metadata["n_bits"] obj.sklearn_model = metadata["sklearn_model"] obj.onnx_model_ = metadata["onnx_model_"] @@ -327,7 +327,7 @@ def dump_dict(self) -> Dict: metadata: Dict[str, Any] = {} - # Concrete-ML + # Concrete ML metadata["n_bits"] = self.n_bits metadata["sklearn_model"] = self.sklearn_model metadata["_is_fitted"] = self._is_fitted @@ -358,7 +358,7 @@ def load_dict(cls, metadata: Dict): # Instantiate the model obj = cls(n_bits=metadata["n_bits"]) - # Concrete-ML + # Concrete ML obj.sklearn_model = metadata["sklearn_model"] obj.onnx_model_ = metadata["onnx_model_"] obj._is_fitted = metadata["_is_fitted"] diff --git a/src/concrete/ml/sklearn/neighbors.py b/src/concrete/ml/sklearn/neighbors.py index 727529419..9737d624f 100644 --- a/src/concrete/ml/sklearn/neighbors.py +++ b/src/concrete/ml/sklearn/neighbors.py @@ -97,7 +97,7 @@ def load_dict(cls, metadata: Dict): # Instantiate the model obj = cls(n_bits=metadata["n_bits"]) - # Concrete-ML + # Concrete ML obj.sklearn_model = metadata["sklearn_model"] obj._is_fitted = metadata["_is_fitted"] obj._is_compiled = metadata["_is_compiled"] diff --git a/src/concrete/ml/sklearn/qnn.py b/src/concrete/ml/sklearn/qnn.py index 2981a66de..95c4914ad 100644 --- a/src/concrete/ml/sklearn/qnn.py +++ b/src/concrete/ml/sklearn/qnn.py @@ -228,7 +228,7 @@ def dump_dict(self) -> Dict[str, Any]: metadata["optimizer"] = optimizer.getvalue().hex() metadata["criterion"] = criterion.getvalue().hex() - # Concrete-ML + # Concrete ML metadata["_is_fitted"] = self._is_fitted metadata["_is_compiled"] = self._is_compiled metadata["input_quantizers"] = self.input_quantizers @@ -314,7 +314,7 @@ def load_dict(cls, metadata: Dict): module__n_layers=metadata["module__n_layers"], ) - # Concrete-ML + # Concrete ML obj._is_fitted = metadata["_is_fitted"] obj._is_compiled = metadata["_is_compiled"] obj.input_quantizers = metadata["input_quantizers"] @@ -540,7 +540,7 @@ def dump_dict(self) -> Dict[str, Any]: metadata["optimizer"] = optimizer.getvalue().hex() metadata["criterion"] = criterion.getvalue().hex() - # Concrete-ML + # Concrete ML metadata["_is_fitted"] = self._is_fitted metadata["_is_compiled"] = self._is_compiled metadata["input_quantizers"] = self.input_quantizers @@ -628,7 +628,7 @@ def load_dict(cls, metadata: Dict): classes=metadata["classes_"], ) - # Concrete-ML + # Concrete ML obj._is_fitted = metadata["_is_fitted"] obj._is_compiled = metadata["_is_compiled"] obj.input_quantizers = metadata["input_quantizers"] diff --git a/src/concrete/ml/sklearn/rf.py b/src/concrete/ml/sklearn/rf.py index a3181d12a..c0673b36b 100644 --- a/src/concrete/ml/sklearn/rf.py +++ b/src/concrete/ml/sklearn/rf.py @@ -77,7 +77,7 @@ def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray: def dump_dict(self) -> Dict[str, Any]: metadata: Dict[str, Any] = {} - # Concrete-ML + # Concrete ML metadata["n_bits"] = self.n_bits metadata["sklearn_model"] = self.sklearn_model metadata["_is_fitted"] = self._is_fitted @@ -117,7 +117,7 @@ def load_dict(cls, metadata: Dict): # Instantiate the model obj = RandomForestClassifier(n_bits=metadata["n_bits"]) - # Concrete-ML + # Concrete ML obj.sklearn_model = metadata["sklearn_model"] obj._is_fitted = metadata["_is_fitted"] obj._is_compiled = metadata["_is_compiled"] @@ -219,7 +219,7 @@ def __init__( def dump_dict(self) -> Dict[str, Any]: metadata: Dict[str, Any] = {} - # Concrete-ML + # Concrete ML metadata["n_bits"] = self.n_bits metadata["sklearn_model"] = self.sklearn_model metadata["_is_fitted"] = self._is_fitted @@ -259,7 +259,7 @@ def load_dict(cls, metadata: Dict): # Instantiate the model obj = RandomForestRegressor(n_bits=metadata["n_bits"]) - # Concrete-ML + # Concrete ML obj.sklearn_model = metadata["sklearn_model"] obj._is_fitted = metadata["_is_fitted"] obj._is_compiled = metadata["_is_compiled"] diff --git a/src/concrete/ml/sklearn/svm.py b/src/concrete/ml/sklearn/svm.py index 509500b3f..093d7e141 100644 --- a/src/concrete/ml/sklearn/svm.py +++ b/src/concrete/ml/sklearn/svm.py @@ -61,7 +61,7 @@ def dump_dict(self) -> Dict[str, Any]: metadata: Dict[str, Any] = {} - # Concrete-ML + # Concrete ML metadata["n_bits"] = self.n_bits metadata["sklearn_model"] = self.sklearn_model metadata["_is_fitted"] = self._is_fitted @@ -94,7 +94,7 @@ def load_dict(cls, metadata: Dict): # Instantiate the model obj = LinearSVR() - # Concrete-ML + # Concrete ML obj.n_bits = metadata["n_bits"] obj.sklearn_model = metadata["sklearn_model"] obj._is_fitted = metadata["_is_fitted"] @@ -180,7 +180,7 @@ def dump_dict(self) -> Dict[str, Any]: metadata: Dict[str, Any] = {} - # Concrete-ML + # Concrete ML metadata["n_bits"] = self.n_bits metadata["sklearn_model"] = self.sklearn_model metadata["_is_fitted"] = self._is_fitted @@ -215,7 +215,7 @@ def load_dict(cls, metadata: Dict): # Instantiate the model obj = LinearSVC() - # Concrete-ML + # Concrete ML obj.n_bits = metadata["n_bits"] obj.sklearn_model = metadata["sklearn_model"] obj._is_fitted = metadata["_is_fitted"] diff --git a/src/concrete/ml/sklearn/tree.py b/src/concrete/ml/sklearn/tree.py index fba10ca3f..048bd6046 100644 --- a/src/concrete/ml/sklearn/tree.py +++ b/src/concrete/ml/sklearn/tree.py @@ -77,7 +77,7 @@ def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray: def dump_dict(self) -> Dict[str, Any]: metadata: Dict[str, Any] = {} - # Concrete-ML + # Concrete ML metadata["n_bits"] = self.n_bits metadata["sklearn_model"] = self.sklearn_model metadata["_is_fitted"] = self._is_fitted @@ -112,7 +112,7 @@ def load_dict(cls, metadata: Dict): # Instantiate the model obj = cls(n_bits=metadata["n_bits"]) - # Concrete-ML + # Concrete ML obj.sklearn_model = metadata["sklearn_model"] obj._is_fitted = metadata["_is_fitted"] obj._is_compiled = metadata["_is_compiled"] @@ -208,7 +208,7 @@ def __getattr__(self, attr: str): def dump_dict(self) -> Dict[str, Any]: metadata: Dict[str, Any] = {} - # Concrete-ML + # Concrete ML metadata["n_bits"] = self.n_bits metadata["sklearn_model"] = self.sklearn_model metadata["_is_fitted"] = self._is_fitted @@ -242,7 +242,7 @@ def load_dict(cls, metadata: Dict): # Instantiate the model obj = cls(n_bits=metadata["n_bits"]) - # Concrete-ML + # Concrete ML obj.sklearn_model = metadata["sklearn_model"] obj._is_fitted = metadata["_is_fitted"] obj._fhe_ensembling = metadata["_fhe_ensembling"] diff --git a/src/concrete/ml/sklearn/xgb.py b/src/concrete/ml/sklearn/xgb.py index e0687da78..366a3ae58 100644 --- a/src/concrete/ml/sklearn/xgb.py +++ b/src/concrete/ml/sklearn/xgb.py @@ -137,7 +137,7 @@ def __init__( def dump_dict(self) -> Dict[str, Any]: metadata: Dict[str, Any] = {} - # Concrete-ML + # Concrete ML metadata["n_bits"] = self.n_bits metadata["sklearn_model"] = self.sklearn_model metadata["_is_fitted"] = self._is_fitted @@ -208,7 +208,7 @@ def load_dict(cls, metadata: Dict): # Instantiate the model obj = XGBClassifier(n_bits=metadata["n_bits"]) - # Concrete-ML + # Concrete ML obj.sklearn_model = metadata["sklearn_model"] obj._is_fitted = metadata["_is_fitted"] obj._is_compiled = metadata["_is_compiled"] @@ -417,7 +417,7 @@ def post_processing(self, y_preds: numpy.ndarray) -> numpy.ndarray: def dump_dict(self) -> Dict[str, Any]: metadata: Dict[str, Any] = {} - # Concrete-ML + # Concrete ML metadata["n_bits"] = self.n_bits metadata["sklearn_model"] = self.sklearn_model metadata["_is_fitted"] = self._is_fitted @@ -487,7 +487,7 @@ def load_dict(cls, metadata: Dict): # Instantiate the model obj = XGBRegressor(n_bits=metadata["n_bits"]) - # Concrete-ML + # Concrete ML obj.sklearn_model = metadata["sklearn_model"] obj._is_fitted = metadata["_is_fitted"] obj._is_compiled = metadata["_is_compiled"] diff --git a/src/concrete/ml/torch/hybrid_backprop_linear.py b/src/concrete/ml/torch/hybrid_backprop_linear.py new file mode 100644 index 000000000..308d6bfe9 --- /dev/null +++ b/src/concrete/ml/torch/hybrid_backprop_linear.py @@ -0,0 +1,116 @@ +"""Linear layer implementations for backprop FHE-compatible models.""" + +from torch import autograd, nn + +# pylint: disable=arguments-differ,abstract-method + + +class ForwardModuleLinear(nn.Module): + """Forward module for linear layers.""" + + def __init__(self, weight, bias=None, weight_transposed=False): + super().__init__() + self.weight = weight + self.bias = bias + self.weight_transposed = weight_transposed # If True, weight is (in_features, out_features) + + def forward(self, input_tensor): + """Forward pass for linear layers. + + Args: + input_tensor: The input tensor. + + Returns: + The output tensor after applying the linear transformation. + """ + if self.weight_transposed: + # Weight is (in_features, out_features) + output = input_tensor @ self.weight + else: + # Weight is (out_features, in_features) + output = input_tensor @ self.weight.t() + if self.bias is not None: + output += self.bias + return output + + +class BackwardModuleLinear(nn.Module): + """Backward module for linear layers.""" + + def __init__(self, weight, weight_transposed=False): + super().__init__() + self.weight = weight + self.weight_transposed = weight_transposed + + def forward(self, grad_output): + """Backward pass for linear layers. + + Args: + grad_output: The gradient output tensor. + + Returns: + The gradient input tensor after applying the backward pass. + """ + if self.weight_transposed: + grad_input = grad_output @ self.weight.t() + else: + grad_input = grad_output @ self.weight + return grad_input + + +class CustomLinear(nn.Module): + """Custom linear module.""" + + def __init__(self, weight, bias=None, weight_transposed=False): + super().__init__() + self.forward_module = ForwardModuleLinear(weight, bias, weight_transposed) + self.backward_module = BackwardModuleLinear(weight, weight_transposed) + + def forward(self, input_tensor): + """Forward pass of the custom linear module. + + Args: + input_tensor: The input tensor. + + Returns: + The output tensor after applying the custom linear module. + """ + return ForwardBackwardModule.apply(input_tensor, self.forward_module, self.backward_module) + + +class ForwardBackwardModule(autograd.Function): + """Custom autograd function for forward and backward passes.""" + + @staticmethod + def forward(ctx, input_tensor, forward_module, backward_module): + """Forward pass of the custom autograd function. + + Args: + ctx: The context object. + input_tensor: The input tensor. + forward_module: The forward module. + backward_module: The backward module. + + Returns: + The output tensor after applying the forward pass. + """ + ctx.backward_module = backward_module + output = forward_module.forward(input_tensor) + return output + + @staticmethod + def backward(ctx, grad_output): + """Backward pass of the custom autograd function. + + Args: + ctx: The context object. + grad_output: The gradient output tensor. + + Returns: + The gradient input tensor after applying the backward pass. + """ + backward_module = ctx.backward_module + grad_input = backward_module.forward(grad_output) + + # grad_weight and grad_bias are not needed when computing the backward for LoRA + return grad_input, None, None diff --git a/src/concrete/ml/torch/hybrid_model.py b/src/concrete/ml/torch/hybrid_model.py index 8cc4e69f2..5aa58e5a0 100644 --- a/src/concrete/ml/torch/hybrid_model.py +++ b/src/concrete/ml/torch/hybrid_model.py @@ -29,7 +29,7 @@ compile_torch_model, has_any_qnn_layers, ) -from .lora import BackwardModuleLinear, ForwardModuleLinear +from .hybrid_backprop_linear import BackwardModuleLinear, ForwardModuleLinear def tuple_to_underscore_str(tup: Tuple) -> str: @@ -389,7 +389,6 @@ def __init__( def _replace_modules(self): """Replace the private modules in the model with remote layers.""" - self._has_only_large_linear_layers = True for module_name in self.module_names: # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3858 @@ -682,7 +681,9 @@ def clear_private_info(module): # Save the model with a specific filename model_path = path / "model.pth" - torch.save(self.model, model_path.resolve()) + # Save the model state dict due to a Brevitas issue + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4572 + torch.save(self.model.state_dict(), model_path.resolve()) # Save the FHE circuit in the same directory self._save_fhe_circuit(path, via_mlir=via_mlir) diff --git a/src/concrete/ml/torch/lora.py b/src/concrete/ml/torch/lora.py index 5a069737a..d80cdab0a 100644 --- a/src/concrete/ml/torch/lora.py +++ b/src/concrete/ml/torch/lora.py @@ -1,16 +1,22 @@ -"""This module contains classes for LoRA (Low-Rank Adaptation) training and custom layers.""" +"""This module contains classes for LoRA (Low-Rank Adaptation) FHE training and custom layers.""" from typing import List, Tuple, Union import torch +from torch import Tensor, nn +from torch.utils.data import DataLoader +from tqdm import tqdm + +from .hybrid_backprop_linear import CustomLinear +from .hybrid_model import HybridFHEModel try: from transformers import Conv1D as TransformerConv1D -except ImportError: +except ImportError: # pragma: no cover TransformerConv1D = None # Create a tuple of linear layer classes to check against -LINEAR_LAYERS: tuple = (torch.nn.Linear,) +LINEAR_LAYERS: tuple = (nn.Linear,) if TransformerConv1D is not None: LINEAR_LAYERS = LINEAR_LAYERS + (TransformerConv1D,) @@ -19,6 +25,23 @@ # pylint: disable=arguments-differ +def try_dict(obj): + """Try to convert the object to a dict. + + Args: + obj: The object to convert to a dict. + + Returns: + The object converted to a dict or None if the conversion fails. + """ + if isinstance(obj, dict): + return obj + try: + return dict(obj) + except (TypeError, ValueError): + return None + + class LoraTraining(torch.nn.Module): """LoraTraining module for fine-tuning with LoRA in a hybrid model setting. @@ -31,49 +54,102 @@ class LoraTraining(torch.nn.Module): toggle between calibration and optimization modes. Args: - inference_model (torch.nn.Module): The base model to be fine-tuned. - n_layers_to_skip (int): Number of layers to skip. Linear layers that do not require - gradient to be propagated are skipped. Defaults to 1. + model (torch.nn.Module): The base model with LoRA layers to be fine-tuned. + n_layers_to_skip_for_backprop (int): Number of initial linear layers to keep as standard + layers. Since the first layer doesn't need backpropagation (no previous layer to + update), we typically skip 1 layer. Defaults to 1. + loss_fn (callable, optional): Loss function to compute the loss. If None, the model + is expected to return a loss. """ - def __init__(self, inference_model, n_layers_to_skip: int = 1) -> None: + def __init__(self, model, n_layers_to_skip_for_backprop=1, loss_fn=None): super().__init__() - self.inference_model = inference_model + # Check if model accepts labels when no loss_fn is provided + if loss_fn is None: + from inspect import signature + + forward_sig = signature(model.forward) + if "labels" not in forward_sig.parameters: + raise ValueError( + "When no loss_fn is provided, the model's forward method" + "must accept a 'labels' parameter" + ) - self.replace_layers_with_custom(self.inference_model, n_layers_to_skip) + # Assert that the model contains LoRA layers + self.assert_has_lora_layers(model) - self.optimizer = None - self.lr_scheduler = None - self.loss_fn = None - self.gradient_accumulation_steps = 1 - self.max_grad_norm = None + self.inference_model = model + self.replace_layers_with_custom(self.inference_model, n_layers_to_skip_for_backprop) self.calibrate = False - self.run_optimizer = False + self.loss_fn = loss_fn + self.loss_scaling_factor = 1.0 + + def set_loss_scaling_factor(self, loss_scaling_factor: float): + """Set a scaling factor for the loss to account for gradient accumulation. + + This ensures that gradients are correctly averaged over multiple + mini-batches when performing gradient accumulation, preventing them + from being scaled up by the number of accumulation steps. + + Args: + loss_scaling_factor (float): The number of gradient accumulation steps. + The loss will be divided by this factor + before backpropagation. + """ + self.loss_scaling_factor = loss_scaling_factor @staticmethod - def replace_layers_with_custom(model: torch.nn.Module, n_layers_to_skip: int): - """Replace linear layers with custom ones. + def assert_has_lora_layers(model): + """Assert that the model contains LoRA layers. + + Args: + model (torch.nn.Module): The model to check for LoRA layers. + + Raises: + ValueError: If the model does not contain any LoRA layers. + """ + + def is_lora_module(module): + # Check for common LoRA attributes with case-insensitive matching + lora_attributes = ["lora_a", "lora_b", "lora_dropout"] + return any( + hasattr(module, attr) + or hasattr(module, attr.lower()) + or hasattr(module, attr.upper()) + for attr in lora_attributes + ) - This method replaces eligible linear layers in the model with custom layers - that are compatible with the LoRA training procedure. + has_lora = any(is_lora_module(module) for module in model.modules()) + + if not has_lora: + raise ValueError("The model does not contain any detectable LoRA layers.") + + print("LoRA layers detected in the model.") + + @staticmethod + def replace_layers_with_custom(model: nn.Module, n_layers_to_skip_for_backprop: int) -> None: + """Replace linear layers with custom ones. Args: - model (torch.nn.Module): The model to replace layers in. - n_layers_to_skip (int): Number of layers to skip. + model (nn.Module): The model to replace layers in. + n_layers_to_skip_for_backprop (int): Number of initial linear layers to keep as standard + layers. Since the first layer doesn't need backpropagation (no previous layer to + update), we typically skip 1 layer. """ - def _replace(module: torch.nn.Module): - nonlocal n_layers_to_skip + def _replace(module: nn.Module): + nonlocal n_layers_to_skip_for_backprop for name, child in list(module.named_children()): - # Skip modules containing "lora" in their name + + # Skip lora layers as they are computed on the client side if "lora" in name: continue if isinstance(child, LINEAR_LAYERS): - if n_layers_to_skip > 0: - n_layers_to_skip -= 1 + if n_layers_to_skip_for_backprop > 0: + n_layers_to_skip_for_backprop -= 1 # Skip the first eligible layer continue @@ -85,7 +161,9 @@ def _replace(module: torch.nn.Module): # Create the CustomLinear layer custom_layer = CustomLinear( - weight=child.weight, bias=child.bias, weight_transposed=weight_transposed + weight=child.weight, + bias=child.bias, + weight_transposed=weight_transposed, ) # Replace the original layer with the custom layer @@ -96,251 +174,239 @@ def _replace(module: torch.nn.Module): _replace(model) - def update_training_parameters( - self, optimizer=None, lr_scheduler=None, loss_fn=None, training_args=None - ): - """Update training parameters for the LoRA module. + def toggle_calibrate(self, enable: bool = True): + """Toggle calibration mode. Args: - optimizer (optional): The optimizer to use for training. - lr_scheduler (optional): The learning rate scheduler to use for training. - loss_fn (callable, optional): Loss function to compute the loss. - training_args (dict or namespace, optional): Training arguments containing - 'gradient_accumulation_steps' and 'max_grad_norm'. + enable (bool): Whether to enable calibration mode. """ - self.optimizer = optimizer - self.lr_scheduler = lr_scheduler - self.loss_fn = loss_fn - - if training_args is not None: - # Check if training_args is a dict or an object with attributes - if isinstance(training_args, dict): - self.gradient_accumulation_steps = training_args.get( - "gradient_accumulation_steps", 1 - ) - self.max_grad_norm = training_args.get("max_grad_norm", None) - else: - self.gradient_accumulation_steps = getattr( - training_args, "gradient_accumulation_steps", 1 - ) - self.max_grad_norm = getattr(training_args, "max_grad_norm", None) - else: - self.gradient_accumulation_steps = 1 - self.max_grad_norm = None + self.calibrate = enable - def forward( - self, inputs: Tuple[torch.Tensor, ...] - ) -> Tuple[torch.Tensor, Union[torch.Tensor, None]]: + def forward(self, inputs: Tuple[Tensor, ...]) -> Tuple[Tensor, Union[Tensor, None]]: """Forward pass of the LoRA training module. Args: - inputs (tuple): A tuple containing the input tensors. The first two elements should be - the features and the labels. Additional elements will be passed - to the model as needed. + inputs (tuple): A tuple containing the input tensors. Returns: - A tuple containing the loss and gradient norm. + A tuple containing the original (unscaled) loss and None. Raises: - ValueError: If the model does not return a loss when `self.loss_fn` is None. + ValueError: If the model does not return a loss and no loss function is provided. """ assert ( - len(inputs) >= 2 + len(inputs) >= 2 and len(inputs) <= 3 ), "Expected at least two inputs in the tuple: inputs (x) and targets (y)" - # Remove this once hybrid model supports multiple inputs - # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4568 - # Extract x (input features) and y (labels) - x, y = inputs[0], inputs[1] + # Unpack depending on how many inputs we have + if len(inputs) == 2: + input_ids, labels = inputs + attention_mask = None + else: + input_ids, labels, attention_mask = inputs - # Additional inputs, if any (e.g., attention_mask) - additional_inputs = inputs[2:] + # Validate attention mask + assert torch.all( + torch.logical_or(attention_mask == 0, attention_mask == 1) + ), "Invalid attention mask provided. Attention mask should only contain 0s and 1s." - # If no loss function is provided, we assume the model can compute the loss internally if self.loss_fn is None: - # Forward pass through the inference model with labels - outputs = self.inference_model(x, labels=y, *additional_inputs) + # Pass inputs and labels to the model + if attention_mask is not None: + outputs = self.inference_model( + input_ids, labels=labels, attention_mask=attention_mask + ) + else: + outputs = self.inference_model(input_ids, labels=labels) - # Use getattr to safely access the loss attribute from the outputs - loss = getattr(outputs, "loss", None) + # Check if outputs is a dict and retrieve the loss + if isinstance(outputs, dict): + loss = outputs.get("loss", None) + else: + loss = getattr(outputs, "loss", None) if loss is None: raise ValueError( - "The model did not return a loss. Ensure that 'labels' are correctly provided." + "The model did not return a loss.", + "Ensure that 'labels' are correctly provided or provide a loss_fn.", ) else: - # Forward pass through the inference model without labels - outputs = self.inference_model(x, *additional_inputs) + # Forward pass without labels; compute loss manually + if attention_mask is not None: + logits = self.inference_model(input_ids, attention_mask=attention_mask) + else: + logits = self.inference_model(input_ids) - # If the outputs contain several keys, extract the logits - if isinstance(outputs, dict) and "logits" in outputs: - outputs = outputs["logits"] + # If logits is a dict with 'logits' key, extract it + if isinstance(logits, dict) and "logits" in logits: + logits = logits["logits"] - # Compute the loss using the provided loss function - loss = self.loss_fn(outputs, y) + loss = self.loss_fn(logits, labels) - # Scale the loss based on gradient accumulation - loss = loss / self.gradient_accumulation_steps + # Scale the loss for gradient accumulation + scaled_loss = loss / self.loss_scaling_factor - # Update gradients # We need to set requires grad to the loss manually because the inference model's last # step is the "lm_head" layer, which might be detached from the graph by the hybrid model - loss.requires_grad_(True) - loss.backward() - - grad_norm = None - if not self.calibrate and self.run_optimizer: - if self.max_grad_norm is not None: - grad_norm = torch.nn.utils.clip_grad_norm_( - self.inference_model.parameters(), max_norm=self.max_grad_norm, norm_type=2 - ) - - if self.optimizer is not None: - self.optimizer.step() + scaled_loss.requires_grad_(True) + scaled_loss.backward() - if self.lr_scheduler is not None: - self.lr_scheduler.step() + # Return the original (unscaled) loss for logging + return loss.detach(), None - self.inference_model.zero_grad() - # Clean gradients after calibration - elif self.calibrate: - self.inference_model.zero_grad() - - return loss, grad_norm - - def toggle_calibrate(self, enable: bool = True): - """Toggle calibration mode. +class LoraTrainer: + """Trainer class for LoRA fine-tuning with FHE support. - Args: - enable (bool): Whether to enable calibration mode. - """ - self.calibrate = enable + This class handles the training loop, optimizer, scheduler, + and integrates with the hybrid model. - def toggle_run_optimizer(self, enable: bool = True): - """Toggle optimizer execution. + Args: + model (nn.Module): The base model with LoRA layers to be fine-tuned. + optimizer (torch.optim.Optimizer): Optimizer for training. + loss_fn (callable): Loss function to compute the loss. + lr_scheduler (optional): Learning rate scheduler. + training_args (dict): Training arguments. + n_layers_to_skip_for_backprop (int): Number of initial linear layers to keep as standard + layers. Since the first layer doesn't need backpropagation (no previous layer to + update), we typically skip 1 layer. Defaults to 1. + """ - Args: - enable (bool): Whether to enable optimizer execution. - """ - self.run_optimizer = enable + def __init__( + self, + model, + optimizer, + loss_fn=None, + lr_scheduler=None, + training_args=None, + n_layers_to_skip_for_backprop=1, + ): + self.optimizer = optimizer + self.lr_scheduler = lr_scheduler + self.training_args = training_args or {} + self.gradient_accumulation_steps = self.training_args.get("gradient_accumulation_steps", 1) + self.max_grad_norm = self.training_args.get("max_grad_norm", None) + # Create the LoraTraining module + self.lora_training_module = LoraTraining( + model, n_layers_to_skip_for_backprop=n_layers_to_skip_for_backprop, loss_fn=loss_fn + ) -class ForwardModuleLinear(torch.nn.Module): - """Forward module for linear layers.""" + # Determine modules to be executed remotely + self.remote_names = get_remote_names(self.lora_training_module) - def __init__(self, weight, bias=None, weight_transposed=False): - super().__init__() - self.weight = weight - self.bias = bias - self.weight_transposed = weight_transposed # If True, weight is (in_features, out_features) + # Create the hybrid model + self.hybrid_model = HybridFHEModel( + self.lora_training_module, module_names=self.remote_names + ) - def forward(self, input_tensor): - """Forward pass for linear layers. + def compile(self, inputset, n_bits=8): + """Compile the hybrid model with the given input set. Args: - input_tensor: The input tensor. - - Returns: - The output tensor after applying the linear transformation. + inputset (tuple): Input set for compilation. + n_bits (int): Bit width for quantization. """ - if self.weight_transposed: - # Weight is (in_features, out_features) - output = input_tensor @ self.weight - else: - # Weight is (out_features, in_features) - output = input_tensor @ self.weight.t() - if self.bias is not None: - output += self.bias - return output - - -class BackwardModuleLinear(torch.nn.Module): - """Backward module for linear layers.""" + self.lora_training_module.toggle_calibrate(enable=True) + self.hybrid_model.compile_model(inputset, n_bits=n_bits) + self.lora_training_module.toggle_calibrate(enable=False) + + def train( + self, + train_loader: DataLoader, + num_epochs: int = 10, + fhe: str = "simulate", + ): + """Train the model using the hybrid FHE model. - def __init__(self, weight, weight_transposed=False): - super().__init__() - self.weight = weight - self.weight_transposed = weight_transposed + Args: + train_loader (DataLoader): DataLoader for training data. + num_epochs (int): Number of epochs to train. + fhe (str): FHE mode ('disable', 'simulate', 'execute' or 'torch'). + """ + device = torch.device("cpu") + self.lora_training_module.to(device) + self.lora_training_module.inference_model.train() - def forward(self, grad_output): - """Backward pass for linear layers. + # Set the loss scaling factor for gradient accumulation + self.lora_training_module.set_loss_scaling_factor(self.gradient_accumulation_steps) - Args: - grad_output: The gradient output tensor. + epoch_pbar = tqdm(range(1, num_epochs + 1), desc="Training", unit="epoch") - Returns: - The gradient input tensor after applying the backward pass. - """ - if self.weight_transposed: - grad_input = grad_output @ self.weight.t() - else: - grad_input = grad_output @ self.weight - return grad_input + for epoch in epoch_pbar: + total_loss = 0.0 + self.optimizer.zero_grad() # Zero gradients at the start of the epoch + for step, batch in enumerate(train_loader): -class CustomLinear(torch.nn.Module): - """Custom linear module.""" + # Convert the batch to a tuple of inputs on the device. + if batch_dict := try_dict(batch): + batch = batch_dict + # Convert dict to tuple of values and move them to the device + batch = tuple( + v.to(device) if isinstance(v, torch.Tensor) else v for v in batch.values() + ) + elif isinstance(batch, (tuple, list)): + # Move tuple/list elements to the device + batch = tuple( + item.to(device) if isinstance(item, torch.Tensor) else item + for item in batch + ) + else: + # If it is a single non-tensor item, wrap it in a tuple + batch = (batch,) - def __init__(self, weight, bias=None, weight_transposed=False): - super().__init__() - self.forward_module = ForwardModuleLinear(weight, bias, weight_transposed) - self.backward_module = BackwardModuleLinear(weight, weight_transposed) + # Forward pass through the hybrid model + loss, _ = self.hybrid_model(batch, fhe=fhe) - def forward(self, input_tensor): - """Forward pass of the custom linear module. + # Loss scaling and backward is done inside LoraTraining - Args: - input_tensor: The input tensor. + # Accumulate loss for logging + total_loss += loss.item() - Returns: - The output tensor after applying the custom linear module. - """ - return ForwardBackwardModule.apply(input_tensor, self.forward_module, self.backward_module) + # Update weights after gradient accumulation steps + if (step + 1) % self.gradient_accumulation_steps == 0 or (step + 1) == len( + train_loader + ): + if self.max_grad_norm is not None: + torch.nn.utils.clip_grad_norm_( + self.lora_training_module.parameters(), self.max_grad_norm + ) + # Optimizer step + self.optimizer.step() -class ForwardBackwardModule(torch.autograd.Function): - """Custom autograd function for forward and backward passes.""" + # Scheduler step + if self.lr_scheduler is not None: + self.lr_scheduler.step() - @staticmethod - def forward(ctx, input_tensor, forward_module, backward_module): - """Forward pass of the custom autograd function. + # Zero gradients + self.optimizer.zero_grad() - Args: - ctx: The context object. - input_tensor: The input tensor. - forward_module: The forward module. - backward_module: The backward module. + avg_loss = total_loss / len(train_loader) + epoch_pbar.set_postfix( + { + "Epoch": epoch, + "Avg Loss": f"{avg_loss:.4f}", + "FHE Mode": fhe, + } + ) - Returns: - The output tensor after applying the forward pass. - """ - ctx.backward_module = backward_module - output = forward_module.forward(input_tensor) - return output + print(f"Training completed. Final Avg Loss: {avg_loss:.4f}, FHE Mode: {fhe}") - @staticmethod - def backward(ctx, grad_output): - """Backward pass of the custom autograd function. + def save_and_clear_private_info(self, path): + """Save the model and remove private information. Args: - ctx: The context object. - grad_output: The gradient output tensor. - - Returns: - The gradient input tensor after applying the backward pass. + path (str): The path to save the model. """ - backward_module = ctx.backward_module - grad_input = backward_module.forward(grad_output) - - # grad_weight and grad_bias are not needed when computing the backward for LoRA - return grad_input, None, None + self.hybrid_model.save_and_clear_private_info(path) -def get_remote_names(model: torch.nn.Module, include_embedding_layers: bool = False) -> List[str]: +def get_remote_names(model: nn.Module, include_embedding_layers: bool = False) -> List[str]: """Get names of modules to be executed remotely. Args: - model (torch.nn.Module): The model to inspect. + model (nn.Module): The model to inspect. include_embedding_layers (bool): Whether to include embedding layers. Returns: @@ -363,7 +429,7 @@ def get_remote_names(model: torch.nn.Module, include_embedding_layers: bool = Fa elif isinstance(module, CustomLinear): remote_names.append(f"{name}.forward_module") remote_names.append(f"{name}.backward_module") - elif include_embedding_layers and (isinstance(module, torch.nn.Embedding) or is_lm_head): + elif include_embedding_layers and (isinstance(module, nn.Embedding) or is_lm_head): remote_names.append(name) return remote_names diff --git a/tests/torch/test_lora.py b/tests/torch/test_lora.py index a3ee1a03e..03a38d929 100644 --- a/tests/torch/test_lora.py +++ b/tests/torch/test_lora.py @@ -1,463 +1,643 @@ -# pylint: disable=redefined-outer-name +"""Tests for the LoRA (Low-Rank Adaptation) functionality in the torch module.""" -"""Tests for the LoraTraining class and related modules in lora.py.""" +# pylint: disable=redefined-outer-name -import sys -from collections import namedtuple -from types import SimpleNamespace -from unittest import mock +from unittest.mock import MagicMock import pytest import torch from torch import nn -from torch.optim import SGD -from torch.optim.lr_scheduler import StepLR -from transformers import Conv1D as TransformerConv1D +from torch.utils.data import DataLoader, Dataset, TensorDataset -from concrete.ml.torch.lora import ( +from concrete.ml.torch.hybrid_backprop_linear import ( BackwardModuleLinear, CustomLinear, - ForwardBackwardModule, ForwardModuleLinear, - LoraTraining, - get_remote_names, ) +from concrete.ml.torch.lora import LoraTrainer, LoraTraining, get_remote_names +# Dummy models and datasets for testing -class DummyConfig: - """A dummy configuration class to mimic model config.""" - def __init__(self, model_type): - self.model_type = model_type +class DummyLoRAModel(nn.Module): + """Dummy LoRA model for testing.""" + def __init__(self): + super().__init__() + # Simulate LoRA layers by including 'lora_a' attribute + self.lora_a = nn.Parameter(torch.randn(10, 10)) + self.linear1 = nn.Linear(10, 20) + self.linear2 = nn.Linear(20, 10) -class DummyBaseModel: - """A dummy base model class to mimic base_model.model.""" - - def __init__(self, model_type): - self.model = DummyModel(model_type) + def forward(self, x, labels=None): + """Forward pass.""" + logits = self.linear2(torch.relu(self.linear1(x))) + if labels is not None: + loss = nn.functional.mse_loss(logits, labels) + return {"loss": loss} + return {"logits": logits} -class DummyModel(torch.nn.Module): - """A dummy model class to mimic the actual model.""" +class DummyLoRAModelNoLoss(nn.Module): + """Dummy LoRA model without loss function for testing.""" - def __init__(self, model_type): + def __init__(self): super().__init__() - self.config = DummyConfig(model_type) + self.lora_a = nn.Parameter(torch.randn(10, 10)) + self.linear1 = nn.Linear(10, 20) + self.linear2 = nn.Linear(20, 10) - @staticmethod - def forward(x): - """Dummy forward method.""" - return x + def forward(self, x): + """Forward pass.""" + logits = self.linear2(torch.relu(self.linear1(x))) + return {"logits": logits} -class DummyInferenceModel(torch.nn.Module): - """A dummy inference model with various layers.""" +class DummyModel(nn.Module): + """Dummy model for testing.""" def __init__(self): super().__init__() - self.base_model = DummyBaseModel("gpt2") - self.linear1 = torch.nn.Linear(2, 2) - self.conv1d = TransformerConv1D(2, 2) - self.linear2 = torch.nn.Linear(2, 2) - self.lora_layer = torch.nn.Linear(2, 2) # Layer with 'lora' in name - self.lora_layer_name = "lora_layer" + self.linear1 = nn.Linear(10, 20) + self.linear2 = nn.Linear(20, 10) - def forward(self, x, labels=None): - """A simple forward method that returns logits or loss.""" - x = self.linear1(x) - x = self.conv1d(x) - x = self.linear2(x) - x = self.lora_layer(x) - logits = x - if labels is not None: - loss = ((logits - labels) ** 2).mean() - Output = namedtuple("Output", ["loss"]) - return Output(loss=loss) - return {"logits": logits, "something_else": torch.tensor(1.0)} + def forward(self, x): + """Forward pass.""" + logits = self.linear2(torch.relu(self.linear1(x))) + return {"logits": logits} @pytest.fixture -def base_inference_model(): - """Fixture for creating a DummyInferenceModel instance.""" - return DummyInferenceModel() +def dummy_lora_model(): + """Dummy LoRA model for testing.""" + return DummyLoRAModel() @pytest.fixture -def base_lora_training(base_inference_model): - """Fixture for creating a LoraTraining instance.""" - return LoraTraining(base_inference_model) +def dummy_model(): + """Dummy model for testing.""" + return DummyModel() -@pytest.mark.parametrize("n_layers_to_skip", [0, 1, 2]) -def test_lora_training_replace_layers(base_lora_training, n_layers_to_skip): - """Test that LoraTraining replaces layers correctly.""" - original_linear1 = base_lora_training.inference_model.linear1 - original_lora_layer = base_lora_training.inference_model.lora_layer +def test_assert_has_lora_layers_with_lora_layers(dummy_lora_model): + """Test assert_has_lora_layers with LoRA layers.""" + LoraTraining.assert_has_lora_layers(dummy_lora_model) - # Replace layers with custom layers - base_lora_training.replace_layers_with_custom( - base_lora_training.inference_model, n_layers_to_skip=n_layers_to_skip - ) - inference_model = base_lora_training.inference_model +def test_assert_has_lora_layers_without_lora_layers(dummy_model): + """Test assert_has_lora_layers without LoRA layers.""" + with pytest.raises(ValueError) as exc_info: + LoraTraining.assert_has_lora_layers(dummy_model) + assert "The model does not contain any detectable LoRA layers" in str(exc_info.value) - if n_layers_to_skip > 0: - # First eligible layer should be skipped - assert inference_model.linear1 is original_linear1 - else: - assert isinstance(inference_model.linear1, CustomLinear) - # Check that other eligible layers are replaced - assert isinstance(inference_model.conv1d, CustomLinear) - assert isinstance(inference_model.linear2, CustomLinear) +def test_replace_layers_with_custom(): + """Test replace_layers_with_custom.""" + model = DummyLoRAModel() + n_layers_to_skip_for_backprop = 1 + LoraTraining.replace_layers_with_custom(model, n_layers_to_skip_for_backprop) + # First linear layer should be skipped, second replaced + assert isinstance(model.linear1, nn.Linear) + assert isinstance(model.linear2, CustomLinear) - # 'lora' layers should not be replaced - assert inference_model.lora_layer is original_lora_layer +def test_replace_layers_with_custom_skips_lora_layers(): + """Test replace_layers_with_custom skips LoRA layers.""" -@pytest.mark.parametrize( - "training_args", - [ - {"gradient_accumulation_steps": 2, "max_grad_norm": 1.0}, # dict - SimpleNamespace(gradient_accumulation_steps=2, max_grad_norm=1.0), # namespace - None, # None - ], -) -def test_update_training_parameters(base_lora_training, training_args): - """Test update_training_parameters with different types of training_args.""" - inference_model = base_lora_training.inference_model - optimizer = SGD(inference_model.parameters(), lr=0.01) - lr_scheduler = StepLR(optimizer, step_size=1) - loss_fn = nn.MSELoss() + class ModelWithLoraLayer(nn.Module): + """Model with LoRA layer for testing.""" + + def __init__(self): + super().__init__() + self.lora_linear = nn.Linear(10, 10) + self.linear = nn.Linear(10, 10) - base_lora_training.update_training_parameters(optimizer, lr_scheduler, loss_fn, training_args) + def forward(self, x): + """Forward pass.""" + x = self.lora_linear(x) + return self.linear(x) - assert base_lora_training.optimizer is optimizer - assert base_lora_training.lr_scheduler is lr_scheduler - assert base_lora_training.loss_fn is loss_fn + model = ModelWithLoraLayer() + n_layers_to_skip_for_backprop = 0 + LoraTraining.replace_layers_with_custom(model, n_layers_to_skip_for_backprop) + assert isinstance(model.lora_linear, nn.Linear) # Should not be replaced + assert isinstance(model.linear, CustomLinear) # Should be replaced - if training_args is None: - assert base_lora_training.gradient_accumulation_steps == 1 # Default - assert base_lora_training.max_grad_norm is None # Default - else: - assert base_lora_training.gradient_accumulation_steps == 2 - assert base_lora_training.max_grad_norm == 1.0 +def test_replace_layers_with_custom_recursive(): + """Test replace_layers_with_custom with nested modules.""" -def test_lora_training_forward_loss_fn_none(base_lora_training): - """Test the forward method when loss_fn is None.""" - x = torch.tensor([[1.0, 2.0]]) - y = torch.tensor([[0.5, 1.5]]) + class ModelWithNestedModules(nn.Module): + """Model with nested modules for testing.""" - loss, _ = base_lora_training((x, y)) + def __init__(self): + super().__init__() + self.layer1 = nn.Sequential(nn.Linear(10, 20), nn.ReLU(), nn.Linear(20, 10)) - expected_loss = ( - base_lora_training.inference_model(x, labels=y).loss - / base_lora_training.gradient_accumulation_steps - ).item() + def forward(self, x): + """Forward pass.""" + return self.layer1(x) - assert abs(loss.item() - expected_loss) < 1e-6 + model = ModelWithNestedModules() + n_layers_to_skip_for_backprop = 0 + LoraTraining.replace_layers_with_custom(model, n_layers_to_skip_for_backprop) + assert isinstance(model.layer1[0], CustomLinear) + assert isinstance(model.layer1[1], nn.ReLU) # Should not be replaced + assert isinstance(model.layer1[2], CustomLinear) -def test_lora_training_forward_with_loss_fn(base_lora_training): - """Test the forward method when loss_fn is provided.""" +def test_forward_with_loss_fn(): + """Test forward with loss function.""" + model = DummyLoRAModel() loss_fn = nn.MSELoss() - base_lora_training.update_training_parameters(loss_fn=loss_fn) - - x = torch.tensor([[1.0, 2.0]]) - y = torch.tensor([[0.5, 1.5]]) + lora_training = LoraTraining(model, loss_fn=loss_fn) + x = torch.randn(5, 10) + y = torch.randn(5, 10) + loss, _ = lora_training((x, y)) + assert isinstance(loss, torch.Tensor) - outputs = base_lora_training.inference_model(x) - expected_loss = loss_fn(outputs["logits"], y) / base_lora_training.gradient_accumulation_steps - loss, _ = base_lora_training((x, y)) +def test_forward_without_loss_fn_model_returns_loss(): + """Test forward without loss function when model returns loss.""" + model = DummyLoRAModel() + lora_training = LoraTraining(model) + x = torch.randn(5, 10) + y = torch.randn(5, 10) + loss, _ = lora_training((x, y)) + assert isinstance(loss, torch.Tensor) - assert abs(loss.item() - expected_loss.item()) < 1e-6 +def test_forward_without_loss_fn_model_returns_loss_as_attribute(): + """Test forward without loss function when model returns loss as attribute.""" -def test_lora_training_forward_no_loss(): - """Test that LoraTraining raises ValueError when model does not return a loss.""" + class DummyLoRAModelReturnsObject(nn.Module): + """Dummy LoRA model returning object with loss.""" - class NoLossInferenceModel(DummyInferenceModel): - """An inference model that does not return a loss.""" + def __init__(self): + super().__init__() + self.lora_a = nn.Parameter(torch.randn(10, 10)) + self.linear1 = nn.Linear(10, 20) + self.linear2 = nn.Linear(20, 10) def forward(self, x, labels=None): - """Forward method that does not return loss.""" - Output = namedtuple("Output", ["something_else"]) - return Output(something_else=torch.tensor(1.0)) + """Forward pass.""" + logits = self.linear2(torch.relu(self.linear1(x))) - no_loss_inference_model = NoLossInferenceModel() - lora_training = LoraTraining(no_loss_inference_model) + class OutputObject: + """Output object containing logits and optional loss.""" - x = torch.tensor([[1.0, 2.0]]) - y = torch.tensor([[0.5, 1.5]]) + def __init__(self, logits, loss=None): + self.logits = logits + self.loss = loss - with pytest.raises(ValueError) as exc_info: - lora_training((x, y)) - assert "The model did not return a loss" in str(exc_info.value) + if labels is not None: + loss = nn.functional.mse_loss(logits, labels) + return OutputObject(logits, loss) + return OutputObject(logits) + model = DummyLoRAModelReturnsObject() + lora_training = LoraTraining(model) + x = torch.randn(5, 10) + y = torch.randn(5, 10) + loss, _ = lora_training((x, y)) + assert isinstance(loss, torch.Tensor) -@pytest.mark.parametrize("enable", [True, False]) -def test_lora_training_toggle_calibrate(base_lora_training, enable): - """Test the toggle_calibrate method.""" - base_lora_training.toggle_calibrate(enable) - assert base_lora_training.calibrate == enable +def test_forward_with_less_than_two_inputs(): + """Test forward with less than two inputs.""" + model = DummyLoRAModel() + lora_training = LoraTraining(model) + x = torch.randn(5, 10) + with pytest.raises(AssertionError) as exc_info: + lora_training((x,)) + assert "Expected at least two inputs" in str(exc_info.value) -@pytest.mark.parametrize("enable", [True, False]) -def test_lora_training_toggle_run_optimizer(base_lora_training, enable): - """Test the toggle_run_optimizer method.""" - base_lora_training.toggle_run_optimizer(enable) - assert base_lora_training.run_optimizer == enable +def test_toggle_calibrate(): + """Test toggle_calibrate.""" + model = DummyLoRAModel() + lora_training = LoraTraining(model) + lora_training.toggle_calibrate(True) + assert lora_training.calibrate is True + lora_training.toggle_calibrate(False) + assert lora_training.calibrate is False -def test_lora_training_forward_with_optimizer(base_lora_training): - """Test the forward method when run_optimizer is True.""" - inference_model = base_lora_training.inference_model - optimizer = SGD(inference_model.parameters(), lr=0.01) - lr_scheduler = StepLR(optimizer, step_size=1) - loss_fn = nn.MSELoss() - base_lora_training.update_training_parameters( - optimizer, - lr_scheduler, - loss_fn, - SimpleNamespace(gradient_accumulation_steps=1, max_grad_norm=1.0), + +def test_set_loss_scaling_factor(): + """Test set_loss_scaling_factor.""" + model = DummyLoRAModel() + lora_training = LoraTraining(model) + lora_training.set_loss_scaling_factor(0.5) + assert lora_training.loss_scaling_factor == 0.5 + + +def test_lora_trainer_init(): + """Test LoraTrainer initialization.""" + model = DummyLoRAModel() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + lora_trainer = LoraTrainer(model, optimizer=optimizer) + assert lora_trainer.lora_training_module is not None + assert lora_trainer.hybrid_model is not None + + +def test_lora_trainer_compile(): + """Test LoraTrainer compile.""" + model = DummyLoRAModel() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + lora_trainer = LoraTrainer(model, optimizer=optimizer) + inputset = [(torch.randn(5, 10), torch.randn(5, 10))] + # Mock the compile_model method + lora_trainer.hybrid_model.compile_model = MagicMock() + lora_trainer.compile(inputset) + lora_trainer.hybrid_model.compile_model.assert_called_once() + assert lora_trainer.lora_training_module.calibrate is False + + +def test_lora_trainer_train(): + """Test LoraTrainer train.""" + model = DummyLoRAModel() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + training_args = {"gradient_accumulation_steps": 1, "max_grad_norm": 1.0} + lora_trainer = LoraTrainer(model, optimizer=optimizer, training_args=training_args) + # Mock the hybrid_model's __call__ method + lora_trainer.hybrid_model = MagicMock( + return_value=(torch.tensor(1.0, requires_grad=True), None) + ) + # Create dummy data loader with different batch types + dataset = TensorDataset(torch.randn(2, 5, 10), torch.randn(2, 5, 10)) + train_loader = DataLoader(dataset, batch_size=1) + lora_trainer.train(train_loader, num_epochs=1, fhe="disable") + + +def test_lora_trainer_train_with_lr_scheduler(): + """Test LoraTrainer train with lr_scheduler.""" + model = DummyLoRAModel() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + lr_scheduler = MagicMock() + training_args = {"gradient_accumulation_steps": 1, "max_grad_norm": 1.0} + lora_trainer = LoraTrainer( + model, optimizer=optimizer, lr_scheduler=lr_scheduler, training_args=training_args ) - base_lora_training.replace_layers_with_custom( - base_lora_training.inference_model, n_layers_to_skip=0 + # Mock the hybrid_model's __call__ method + lora_trainer.hybrid_model = MagicMock( + return_value=(torch.tensor(1.0, requires_grad=True), None) ) - base_lora_training.toggle_run_optimizer(True) + # Create dummy data loader + dataset = TensorDataset(torch.randn(2, 5, 10), torch.randn(2, 5, 10)) + train_loader = DataLoader(dataset, batch_size=1) + lora_trainer.train(train_loader, num_epochs=1) + # Check that lr_scheduler.step() was called + assert lr_scheduler.step.call_count > 0 + + +def test_lora_trainer_save_and_clear_private_info(): + """Test LoraTrainer save_and_clear_private_info.""" + model = DummyLoRAModel() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + lora_trainer = LoraTrainer(model, optimizer=optimizer, loss_fn=nn.MSELoss()) + lora_trainer.hybrid_model.save_and_clear_private_info = MagicMock() + lora_trainer.save_and_clear_private_info("path/to/model") + lora_trainer.hybrid_model.save_and_clear_private_info.assert_called_once_with("path/to/model") + + +def test_custom_linear_forward_backward(): + """Test CustomLinear forward and backward.""" + weight = torch.randn(20, 10) + bias = torch.randn(20) + custom_linear = CustomLinear(weight, bias) + x = torch.randn(5, 10, requires_grad=True) + y = custom_linear(x) + loss = y.sum() + loss.backward() + assert x.grad is not None + + +def test_custom_linear_weight_transposed(): + """Test CustomLinear with weight transposed.""" + weight = torch.randn(10, 20) + bias = torch.randn(20) + custom_linear = CustomLinear(weight, bias, weight_transposed=True) + x = torch.randn(5, 10, requires_grad=True) + y = custom_linear(x) + loss = y.sum() + loss.backward() + assert x.grad is not None - x = torch.tensor([[1.0, 2.0]]) - y = torch.tensor([[0.5, 1.5]]) - # Save initial parameters - initial_params = {name: param.clone() for name, param in inference_model.named_parameters()} +def test_get_remote_names(): + """Test get_remote_names.""" + model = DummyLoRAModel() + LoraTraining.replace_layers_with_custom(model, n_layers_to_skip_for_backprop=0) + remote_names = get_remote_names(model) + assert "linear1.forward_module" in remote_names + assert "linear1.backward_module" in remote_names + assert "linear2.forward_module" in remote_names + assert "linear2.backward_module" in remote_names + assert "lora_a" not in remote_names - # Perform forward pass - _, _ = base_lora_training((x, y)) - # Ensure that only parameters with "lora" in their name have been updated - for name, param in inference_model.named_parameters(): - if "lora" in name: - assert not torch.equal( - initial_params[name], param - ), f"Lora parameter {name} was not updated" - else: - assert torch.equal( - initial_params[name], param - ), f"Non-lora parameter {name} was unexpectedly updated" +def test_get_remote_names_include_embedding_layers(): + """Test get_remote_names with include_embedding_layers.""" + class ModelWithEmbedding(nn.Module): + """Model with embedding layer for testing.""" -def test_lora_training_forward_calibrate(base_lora_training): - """Test the forward method when calibration is enabled.""" - inference_model = base_lora_training.inference_model - base_lora_training.toggle_calibrate(True) + def __init__(self): + super().__init__() + self.embedding = nn.Embedding(10, 10) + self.linear = nn.Linear(10, 10) - x = torch.tensor([[1.0, 2.0]]) - y = torch.tensor([[0.5, 1.5]]) + def forward(self, x): + """Forward pass.""" + x = self.embedding(x) + x = self.linear(x) + return x - _, _ = base_lora_training((x, y)) + model = ModelWithEmbedding() + remote_names = get_remote_names(model, include_embedding_layers=True) + assert "embedding" in remote_names + assert "linear" in remote_names - # Ensure that gradients are zeroed - for param in inference_model.parameters(): - if param.grad is not None: - assert torch.all(param.grad == 0) +def test_get_remote_names_skips_lm_head_when_excluded(): + """Test get_remote_names skips lm_head when excluded.""" -@pytest.mark.parametrize("weight_transposed", [False, True]) -def test_forward_module_linear(weight_transposed): - """Test ForwardModuleLinear.""" - weight = torch.tensor([[1.0, 2.0], [3.0, 4.0]]) - bias = torch.tensor([0.5, -0.5]) - module = ForwardModuleLinear(weight, bias, weight_transposed=weight_transposed) + class ModelWithLMHead(nn.Module): + """Model with lm_head for testing.""" - input_tensor = torch.tensor([[1.0, 0.0], [0.0, 1.0]]) - output = module(input_tensor) + def __init__(self): + super().__init__() + self.lm_head = nn.Linear(10, 10) + self.linear = nn.Linear(10, 10) - if weight_transposed: - expected_output = input_tensor @ weight + bias - else: - expected_output = input_tensor @ weight.t() + bias + def forward(self, x): + """Forward pass.""" + return self.linear(x) - assert torch.allclose(output, expected_output) + model = ModelWithLMHead() + remote_names = get_remote_names(model, include_embedding_layers=False) + assert "lm_head" not in remote_names + assert "linear" in remote_names -@pytest.mark.parametrize("weight_transposed", [False, True]) -def test_backward_module_linear(weight_transposed): - """Test BackwardModuleLinear.""" - weight = torch.tensor([[1.0, 2.0], [3.0, 4.0]]) - module = BackwardModuleLinear(weight, weight_transposed=weight_transposed) +def test_replace_layers_with_transformer_conv1d(monkeypatch): + """Test replace_layers_with_custom with TransformerConv1D.""" - grad_output = torch.tensor([[1.0, 0.0], [0.0, 1.0]]) - grad_input = module(grad_output) + class MockTransformerConv1D(nn.Module): + """Mock TransformerConv1D module for testing.""" - if weight_transposed: - expected_grad_input = grad_output @ weight.t() - else: - expected_grad_input = grad_output @ weight + def __init__(self, in_features, out_features): + super().__init__() + self.in_features = in_features + self.out_features = out_features + self.weight = nn.Parameter(torch.randn(out_features, in_features)) + self.bias = nn.Parameter(torch.randn(out_features)) - assert torch.allclose(grad_input, expected_grad_input) + def forward(self, x): + """Forward pass.""" + return x @ self.weight.t() + self.bias + # Patch TransformerConv1D and LINEAR_LAYERS in the lora module + monkeypatch.setattr("concrete.ml.torch.lora.TransformerConv1D", MockTransformerConv1D) + monkeypatch.setattr("concrete.ml.torch.lora.LINEAR_LAYERS", (nn.Linear, MockTransformerConv1D)) -@pytest.mark.parametrize("weight_transposed", [False, True]) -def test_custom_linear(weight_transposed): - """Test the CustomLinear module.""" - weight = torch.tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True) - bias = torch.tensor([0.5, -0.5], requires_grad=True) - module = CustomLinear(weight, bias, weight_transposed=weight_transposed) + class ModelWithConv1D(nn.Module): + """Model with Conv1D layer for testing.""" - input_tensor = torch.tensor([[1.0, 0.0]], requires_grad=True) - output = module(input_tensor) + def __init__(self): + super().__init__() + self.conv1d = MockTransformerConv1D(10, 10) - if weight_transposed: - expected_output = input_tensor @ weight + bias - else: - expected_output = input_tensor @ weight.t() + bias + def forward(self, x): + """Forward pass.""" + return self.conv1d(x) - assert torch.allclose(output, expected_output) + model = ModelWithConv1D() + n_layers_to_skip_for_backprop = 0 + LoraTraining.replace_layers_with_custom(model, n_layers_to_skip_for_backprop) + assert isinstance(model.conv1d, CustomLinear) - # Test backward - output.sum().backward() - if weight_transposed: - expected_grad_input = torch.ones_like(output) @ weight.t() - else: - expected_grad_input = torch.ones_like(output) @ weight - assert input_tensor.grad is not None and torch.allclose(input_tensor.grad, expected_grad_input) +def test_forward_backward_module(): + """Test the ForwardBackwardModule autograd function.""" + weight = torch.randn(20, 10) + bias = torch.randn(20) + forward_module = ForwardModuleLinear(weight, bias) + backward_module = BackwardModuleLinear(weight) + x = torch.randn(5, 10) + y = forward_module(x) + grad_output = torch.randn_like(y) + grad_input = backward_module(grad_output) + assert grad_input.shape == x.shape -@pytest.mark.parametrize("weight_transposed", [False, True]) -def test_forward_backward_module(weight_transposed): - """Test the ForwardBackwardModule.""" - weight = torch.tensor([[1.0, 2.0], [3.0, 4.0]]) - bias = torch.tensor([0.5, -0.5]) - forward_module = ForwardModuleLinear(weight, bias, weight_transposed=weight_transposed) - backward_module = BackwardModuleLinear(weight, weight_transposed=weight_transposed) +def test_lora_training_forward_with_loss_fn_and_attention_mask(): + """Test LoraTraining forward using a custom loss_fn and attention_mask.""" - input_tensor = torch.tensor([[1.0, 0.0]], requires_grad=True) - output = ForwardBackwardModule.apply(input_tensor, forward_module, backward_module) + class ModelWithAttention(nn.Module): + """Model that supports attention_mask for testing.""" - if weight_transposed: - expected_output = input_tensor @ weight + bias - expected_grad_input = torch.ones_like(output) @ weight.t() - else: - expected_output = input_tensor @ weight.t() + bias - expected_grad_input = torch.ones_like(output) @ weight + def __init__(self): + super().__init__() + self.lora_a = nn.Parameter(torch.randn(10, 10)) + self.linear = nn.Linear(10, 10) - assert torch.allclose(output, expected_output) + def forward(self, x, attention_mask=None): + """Forward pass.""" + if attention_mask is not None: + return {"logits": self.linear(x + attention_mask)} + return {"logits": self.linear(x)} - # Test backward - output.sum().backward() + # Define a simple loss function + def simple_loss_fn(logits, labels): + return nn.MSELoss()(logits, labels) - assert input_tensor.grad is not None and torch.allclose(input_tensor.grad, expected_grad_input) + model = ModelWithAttention() + # Instantiate LoraTraining with a custom loss_fn + lora_training = LoraTraining(model, loss_fn=simple_loss_fn) -def test_get_remote_names(): - """Test get_remote_names function.""" + x = torch.randn(5, 10) + y = torch.randn(5, 10) + attention_mask = torch.randint(0, 2, (5, 10)) - class TestModel(torch.nn.Module): - """Test model for get_remote_names test.""" + # Call forward with (input_ids, labels, attention_mask) + loss, _ = lora_training((x, y, attention_mask)) + assert isinstance(loss, torch.Tensor) + + +def test_lora_training_forward_with_additional_inputs(): + """Test LoraTraining forward with additional inputs.""" + + class ModelWithAttention(nn.Module): + """Model with attention input for testing.""" def __init__(self): super().__init__() - self.linear = torch.nn.Linear(10, 10) - self.conv1d = TransformerConv1D(10, 10) - self.embedding = torch.nn.Embedding(10, 10) - self.lm_head = torch.nn.Linear(10, 10) - self.lora_layer = torch.nn.Linear(10, 10) - self.lora_layer_name = "lora_layer" + self.lora_a = nn.Parameter(torch.randn(10, 10)) + self.linear = nn.Linear(10, 10) + + def forward(self, x, attention_mask=None, labels=None): + """Forward pass with an attention mask.""" + # Just treat the attention_mask as an extra input + # and add it to x before passing through linear. + if attention_mask is not None: + logits = self.linear(x + attention_mask) + else: + logits = self.linear(x) + + if labels is not None: + loss = nn.functional.mse_loss(logits, labels) + return {"loss": loss} + return {"logits": logits} + + model = ModelWithAttention() + lora_training = LoraTraining(model) + x = torch.randn(5, 10) + y = torch.randn(5, 10) + attention_mask = torch.randint(0, 2, (5, 10)) - def forward(self, x): - """Forward method.""" - return self.lm_head(self.linear(x)) + loss, _ = lora_training((x, y, attention_mask)) + assert isinstance(loss, torch.Tensor) - model = TestModel() +def test_lora_training_forward_with_no_loss_fn_and_no_labels(): + """Test LoraTraining when model returns loss=None and no loss_fn provided.""" + model = DummyLoRAModel() lora_training = LoraTraining(model) - remote_names = get_remote_names(lora_training) - expected_names = [ - "inference_model.linear", - "inference_model.conv1d.forward_module", - "inference_model.conv1d.backward_module", - ] - - assert set(remote_names) == set(expected_names) - - # Test with include_embedding_layers=True - remote_names_with_embeddings = get_remote_names(lora_training, include_embedding_layers=True) - expected_names_with_embeddings = [ - "inference_model.linear", - "inference_model.conv1d.forward_module", - "inference_model.conv1d.backward_module", - # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4609 - "inference_model.embedding", - "inference_model.lm_head.forward_module", - "inference_model.lm_head.backward_module", - ] - assert set(remote_names_with_embeddings) == set(expected_names_with_embeddings) - - -def test_lora_without_transformers(): - """ - Test the lora.py module when the transformers library is not installed. - """ - - # Save the original transformers module if it's already imported - transformers_original = sys.modules.get("transformers", None) - - # Mock the transformers import to simulate it being unavailable - with mock.patch.dict("sys.modules", {"transformers": None}): - # Reload the lora module to apply the mocked transformers import - if "concrete.ml.torch.lora" in sys.modules: - del sys.modules["concrete.ml.torch.lora"] - import concrete.ml.torch.lora as lora # pylint: disable=R0402,C0415 - - # Ensure that TransformerConv1D is None - assert lora.TransformerConv1D is None - - # Create a simple model without any Conv1D layers - model = torch.nn.Sequential( - torch.nn.Linear(10, 20), - torch.nn.ReLU(), - torch.nn.Linear(20, 5), - ) - - # Initialize LoraTraining with the model - lora_training = lora.LoraTraining(model) - - # Check that layers have been replaced with CustomLinear - replaced_layers = [] - for name, module in lora_training.inference_model.named_modules(): - if isinstance(module, lora.CustomLinear): - replaced_layers.append(name) - - # Assert that CustomLinear layers have been added - assert len(replaced_layers) > 0, "No layers were replaced with CustomLinear." - - # Prepare input data - x = torch.randn(3, 10) # Batch size 3, input size 10 - y = torch.randint(0, 5, (3,)) # Batch size 3, number of classes 5 - - # Define a simple loss function - loss_fn = torch.nn.CrossEntropyLoss() - - # Update training parameters - lora_training.update_training_parameters(loss_fn=loss_fn) - - # Perform a forward pass - loss, grad_norm = lora_training((x, y)) - - # Check that loss is computed and gradients are updated - assert loss.requires_grad, "Loss does not require gradients." - assert loss.item() > 0, "Loss should be greater than zero." - - # Since optimizer is not set, grad_norm should be None - assert grad_norm is None, "Gradient norm should be None when optimizer is not set." - - # Restore the original transformers module after the test - if transformers_original is not None: - sys.modules["transformers"] = transformers_original - elif "transformers" in sys.modules: - del sys.modules["transformers"] + x = torch.randn(5, 10) + y = None # No labels provided + with pytest.raises(ValueError) as exc_info: + lora_training((x, y)) + assert "The model did not return a loss." in str(exc_info.value) + + +def test_lora_trainer_train_with_various_batch_types(): + """Test LoraTrainer.train with batches of different types.""" + model = DummyLoRAModel() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + lora_trainer = LoraTrainer(model, optimizer=optimizer) + + # Mock the hybrid_model's __call__ method + lora_trainer.hybrid_model = MagicMock( + return_value=(torch.tensor(1.0, requires_grad=True), None) + ) + + class DictDataset(Dataset): + """Dataset with dict items.""" + + def __init__(self, data): + self.data = data + + def __len__(self): + return len(self.data) + + def __getitem__(self, idx): + return self.data[idx] + + class ListDataset(Dataset): + """Dataset with list items.""" + + def __init__(self, data): + self.data = data + + def __len__(self): + return len(self.data) + + def __getitem__(self, idx): + return self.data[idx] + + class NonTensorDataset(Dataset): + """Dataset with non-tensor items.""" + + def __init__(self, data): + self.data = data + + def __len__(self): + return len(self.data) + + def __getitem__(self, idx): + return self.data[idx] + + # Test with dict batch + dataset_dict = [{"input": torch.randn(5, 10), "label": torch.randn(5, 10)} for _ in range(2)] + train_loader_dict: DataLoader = DataLoader(DictDataset(dataset_dict), batch_size=1) + lora_trainer.train(train_loader_dict, num_epochs=1) + + # Test with list/tuple batch + dataset_list = [(torch.randn(5, 10), torch.randn(5, 10)) for _ in range(2)] + train_loader_list: DataLoader = DataLoader(ListDataset(dataset_list), batch_size=1) + lora_trainer.train(train_loader_list, num_epochs=1) + + # Test with single tensor batch + dataset_single = TensorDataset(torch.stack([torch.randn(5, 10) for _ in range(2)])) + train_loader_single: DataLoader = DataLoader(dataset_single, batch_size=1) + lora_trainer.train(train_loader_single, num_epochs=1) + + # Test with single non-tensor item batch + dataset_non_tensor = NonTensorDataset( + [42 for _ in range(2)] + ) # Using integers as non-tensor data + train_loader_non_tensor: DataLoader = DataLoader(dataset_non_tensor, batch_size=1) + lora_trainer.train(train_loader_non_tensor, num_epochs=1) + + +def test_lora_trainer_train_with_gradient_accumulation(): + """Test LoraTrainer.train with gradient accumulation steps.""" + model = DummyLoRAModel() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + training_args = {"gradient_accumulation_steps": 2, "max_grad_norm": 1.0} + lora_trainer = LoraTrainer(model, optimizer=optimizer, training_args=training_args) + # Mock the hybrid_model's __call__ method + lora_trainer.hybrid_model = MagicMock( + return_value=(torch.tensor(1.0, requires_grad=True), None) + ) + # Create dummy data loader + dataset = TensorDataset(torch.randn(4, 5, 10), torch.randn(4, 5, 10)) + train_loader: DataLoader = DataLoader(dataset, batch_size=1) + lora_trainer.train(train_loader, num_epochs=1) + + +def test_get_remote_names_with_lora_in_name(): + """Test get_remote_names skips modules with 'lora' in name.""" + + class ModelWithLoraInName(nn.Module): + """Model with LoRA layer for testing.""" + + def __init__(self): + super().__init__() + self.lora_linear = nn.Linear(10, 10) + self.linear = nn.Linear(10, 10) + + def forward(self, x): + """Forward pass with lora_linear.""" + x = self.lora_linear(x) + x = self.linear(x) + return x + + model = ModelWithLoraInName() + remote_names = get_remote_names(model) + assert "lora_linear" not in remote_names + assert "linear" in remote_names + + +def test_lora_training_init_validates_model_signature(): + """Test LoraTraining initialization validates model's forward signature.""" + + class ModelWithoutLabels(nn.Module): + """Model without labels parameter in forward.""" + + def __init__(self): + super().__init__() + self.lora_a = nn.Parameter(torch.randn(10, 10)) + self.linear = nn.Linear(10, 10) + + def forward(self, x): # No labels parameter + """Forward pass without labels parameter.""" + return {"logits": self.linear(x)} + + model = ModelWithoutLabels() + + with pytest.raises(ValueError) as exc_info: + LoraTraining(model, loss_fn=None) # No loss_fn provided + assert "must accept a 'labels' parameter" in str(exc_info.value) diff --git a/use_case_examples/lora_finetuning/GPT2FineTuneHybrid.ipynb b/use_case_examples/lora_finetuning/GPT2FineTuneHybrid.ipynb index c9eada04d..51a99b21b 100644 --- a/use_case_examples/lora_finetuning/GPT2FineTuneHybrid.ipynb +++ b/use_case_examples/lora_finetuning/GPT2FineTuneHybrid.ipynb @@ -5,9 +5,9 @@ "id": "dfccd8e6", "metadata": {}, "source": [ - "# Fine-Tuning GPT-2 on Encrypted Data with LoRA and Concrete-ML\n", + "# Fine-Tuning GPT-2 on Encrypted Data with LoRA and Concrete ML\n", "\n", - "In this notebook, we perform fine-tuning of a GPT-2 model using LoRA and Concrete-ML." + "In this notebook, we perform fine-tuning of a GPT-2 model using LoRA and Concrete ML." ] }, { @@ -111,7 +111,15 @@ "execution_count": 5, "id": "5ac49f9d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LoRA layers detected in the model.\n" + ] + } + ], "source": [ "# Set up LoRA training\n", "lora_training = LoraTraining(peft_model)" @@ -126,7 +134,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "656e3f624a7f4c879b46129e841e4db1", + "model_id": "9775e413ec264b2eb14ee53dbc381474", "version_major": 2, "version_minor": 0 }, @@ -303,9 +311,8 @@ "\n", "trainer.create_optimizer_and_scheduler(num_training_steps=max_steps)\n", "\n", - "lora_training.update_training_parameters(\n", - " trainer.optimizer, trainer.lr_scheduler, causal_lm_loss, training_args\n", - ")" + "lr_scheduler = trainer.lr_scheduler\n", + "optimizer = trainer.optimizer" ] }, { @@ -338,9 +345,13 @@ "outputs": [], "source": [ "# Prepare input data for calibration\n", - "input_tensor = torch.randint(0, tokenizer.vocab_size, (PER_DEVICE_TRAIN_BATCH_SIZE, BLOCK_SIZE))\n", - "label_tensor = torch.randint(0, tokenizer.vocab_size, (PER_DEVICE_TRAIN_BATCH_SIZE, BLOCK_SIZE))\n", - "attention_mask = torch.ones((PER_DEVICE_TRAIN_BATCH_SIZE, BLOCK_SIZE))\n", + "input_tensor = torch.randint(\n", + " 0, tokenizer.vocab_size, (PER_DEVICE_TRAIN_BATCH_SIZE, BLOCK_SIZE), dtype=torch.long\n", + ")\n", + "label_tensor = torch.randint(\n", + " 0, tokenizer.vocab_size, (PER_DEVICE_TRAIN_BATCH_SIZE, BLOCK_SIZE), dtype=torch.long\n", + ")\n", + "attention_mask = torch.ones((PER_DEVICE_TRAIN_BATCH_SIZE, BLOCK_SIZE), dtype=torch.long)\n", "\n", "inputset = (input_tensor, label_tensor, attention_mask)" ] @@ -373,12 +384,11 @@ "\n", " # Training loop\n", " peft_model.train()\n", - " lora_training.run_optimizer = True\n", " total_epochs = int(training_args.num_train_epochs)\n", " epoch_pbar = tqdm(total=total_epochs, desc=\"Training Progress\", position=0)\n", "\n", " total_batched_samples = 0\n", - " epoch_losses = [] # List to store the loss for each epoch\n", + " epoch_losses = []\n", "\n", " # Generate text before the first epoch\n", " print(\"Generating text before the first epoch:\\n\")\n", @@ -392,22 +402,29 @@ " grad_norms = []\n", "\n", " for _, batch in enumerate(train_dataloader):\n", - "\n", " total_batched_samples += 1\n", - "\n", " batch = {k: v.to(device) for k, v in batch.items()}\n", "\n", + " # Zero the gradients\n", + " optimizer.zero_grad()\n", + "\n", + " # Forward pass\n", " loss, grad_norm = hybrid_model(\n", " (batch[\"input_ids\"], batch[\"labels\"], batch[\"attention_mask\"]), fhe=fhe\n", " )\n", "\n", - " total_loss += loss.item()\n", + " # Optimizer step\n", + " optimizer.step()\n", "\n", + " # Learning rate scheduler step\n", + " lr_scheduler.step()\n", + "\n", + " total_loss += loss.item()\n", " if grad_norm is not None:\n", " grad_norms.append(grad_norm)\n", "\n", " # Get current learning rate\n", - " current_lr = lora_training.lr_scheduler.get_last_lr()[0]\n", + " current_lr = lr_scheduler.get_last_lr()[0]\n", "\n", " # Get last grad norm\n", " current_grad_norm = grad_norms[-1] if grad_norms else None\n", @@ -846,7 +863,7 @@ "tokenizer.parallelism = False\n", "\n", "# Train the model using FHE simulation\n", - "train_custom_model(hybrid_model, train_dataloader, training_args, tokenizer, fhe=\"simulate\")" + "train_custom_model(hybrid_model, train_dataloader, training_args, tokenizer, fhe=\"disable\")" ] }, { @@ -854,7 +871,7 @@ "id": "65d448c8", "metadata": {}, "source": [ - "Note that our goal is to showcase the use of FHE for fine-tuning a model. The a dataset used contains 68 examples for a total of 2386 tokens. This a very small dataset which does not allow the model to learn a lot of information and output very interesting results." + "Note that our goal is to showcase the use of FHE for encrypted fine-tuning. The dataset consists of 68 examples and a total of 2,386 tokens, which is relatively small. Despite its limited size, which offers little support for the model's learning process, it still manages to produce interesting results." ] }, { diff --git a/use_case_examples/lora_finetuning/LLamaFineTuning.ipynb b/use_case_examples/lora_finetuning/LLamaFineTuning.ipynb new file mode 100644 index 000000000..7ee8a6810 --- /dev/null +++ b/use_case_examples/lora_finetuning/LLamaFineTuning.ipynb @@ -0,0 +1,345 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Fine-Tuning GPT-2 with LoRA and FHE using `LoraTrainer`\n", + "\n", + "This notebook demonstrates how to fine-tune a Llama-3.2-1B model using LoRA (Low-Rank Adaptation) with Fully Homomorphic Encryption (FHE). We leverage the `LoraTrainer` API from the `concrete.ml.torch.lora` library to simplify the process.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import random\n", + "import shutil\n", + "from pathlib import Path\n", + "\n", + "import numpy as np\n", + "import torch\n", + "from datasets import load_dataset\n", + "from peft import LoraConfig, get_peft_model\n", + "from transformers import (\n", + " AutoModelForCausalLM,\n", + " AutoTokenizer,\n", + " DataCollatorForLanguageModeling,\n", + " Trainer,\n", + " TrainingArguments,\n", + ")\n", + "from utils_lora import generate_and_print\n", + "\n", + "# Import LoraTrainer from the provided library\n", + "from concrete.ml.torch.lora import LoraTrainer" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Set seed for reproducibility\n", + "SEED = 0\n", + "random.seed(SEED)\n", + "np.random.seed(SEED)\n", + "torch.manual_seed(SEED)\n", + "if torch.cuda.is_available():\n", + " torch.cuda.manual_seed_all(SEED)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the model and tokenizer\n", + "model_name = \"meta-llama/Llama-3.2-1B\"\n", + "tokenizer = AutoTokenizer.from_pretrained(model_name)\n", + "model = AutoModelForCausalLM.from_pretrained(model_name)\n", + "\n", + "# Ensure the tokenizer has a pad token\n", + "if tokenizer.pad_token is None:\n", + " tokenizer.pad_token = tokenizer.eos_token\n", + "model.config.pad_token_id = model.config.eos_token_id\n", + "\n", + "# Freeze the original model's weights\n", + "for param in model.parameters():\n", + " param.requires_grad = False" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Initial generation with base model:\n", + "from concrete.ml.sklearn import LogisticRegression\n", + "\n", + "model = LogisticRegression( eta=0.1, max_iter=1000, random_state=42)\n", + "None\n" + ] + } + ], + "source": [ + "# Print the initial generation with the base model\n", + "PROMPT = \"from concrete.ml.sklearn import LogisticRegression\\n\\nmodel = LogisticRegression(\"\n", + "print(\"Initial generation with base model:\")\n", + "print(generate_and_print(PROMPT, model, tokenizer, seed=SEED))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Apply LoRA configuration\n", + "peft_config = LoraConfig(\n", + " r=8,\n", + " lora_alpha=32,\n", + " lora_dropout=0.01,\n", + " bias=\"none\",\n", + " task_type=\"CAUSAL_LM\",\n", + " target_modules=\"all-linear\",\n", + ")\n", + "peft_model = get_peft_model(model, peft_config)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" + ] + } + ], + "source": [ + "# Load the dataset and tokenize it\n", + "dataset = load_dataset(\"json\", data_files=\"data_finetune/dataset.jsonl\", split=\"train\")\n", + "\n", + "\n", + "def tokenize_function(examples):\n", + " return tokenizer(examples[\"text\"], padding=\"longest\", truncation=True)\n", + "\n", + "\n", + "tokenized_dataset = dataset.map(tokenize_function, batched=True)\n", + "data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Define training arguments\n", + "EPOCHS = 10\n", + "PER_DEVICE_TRAIN_BATCH_SIZE = 4\n", + "training_args = TrainingArguments(\n", + " output_dir=\"./checkpoints\",\n", + " num_train_epochs=EPOCHS,\n", + " per_device_train_batch_size=PER_DEVICE_TRAIN_BATCH_SIZE,\n", + " gradient_accumulation_steps=1,\n", + " save_total_limit=1,\n", + " use_cpu=True,\n", + " learning_rate=2e-4,\n", + " lr_scheduler_type=\"linear\",\n", + " seed=SEED,\n", + " data_seed=SEED,\n", + " warmup_steps=10,\n", + " weight_decay=0.01,\n", + " prediction_loss_only=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LoRA layers detected in the model.\n" + ] + } + ], + "source": [ + "# Create optimizer and scheduler using HuggingFace's Trainer\n", + "hf_trainer = Trainer(\n", + " model=peft_model,\n", + " args=training_args,\n", + " train_dataset=tokenized_dataset,\n", + " data_collator=data_collator,\n", + ")\n", + "train_dataloader = hf_trainer.get_train_dataloader()\n", + "hf_trainer.create_optimizer_and_scheduler(num_training_steps=len(train_dataloader) * EPOCHS)\n", + "\n", + "optimizer = hf_trainer.optimizer\n", + "lr_scheduler = hf_trainer.lr_scheduler\n", + "\n", + "\n", + "# Define a causal LM loss function\n", + "def causal_lm_loss(logits, labels, ignore_index=-100):\n", + " shift_logits = logits[..., :-1, :].contiguous()\n", + " shift_labels = labels[..., 1:].contiguous()\n", + " shift_logits = shift_logits.view(-1, shift_logits.size(-1))\n", + " shift_labels = shift_labels.view(-1)\n", + " loss = torch.nn.functional.cross_entropy(\n", + " shift_logits, shift_labels, ignore_index=ignore_index, reduction=\"mean\"\n", + " )\n", + " return loss\n", + "\n", + "\n", + "# Prepare input data for calibration\n", + "lengths = [len(item[\"input_ids\"]) for item in tokenized_dataset]\n", + "if not all(length == lengths[0] for length in lengths):\n", + " raise ValueError(\"All examples must have the same length for calibration.\")\n", + "BLOCK_SIZE = lengths[0]\n", + "\n", + "input_tensor = torch.randint(\n", + " 0, tokenizer.vocab_size, (PER_DEVICE_TRAIN_BATCH_SIZE, BLOCK_SIZE), dtype=torch.long\n", + ")\n", + "label_tensor = torch.randint(\n", + " 0, tokenizer.vocab_size, (PER_DEVICE_TRAIN_BATCH_SIZE, BLOCK_SIZE), dtype=torch.long\n", + ")\n", + "attention_mask = torch.ones((PER_DEVICE_TRAIN_BATCH_SIZE, BLOCK_SIZE), dtype=torch.long)\n", + "inputset = (input_tensor, label_tensor, attention_mask)\n", + "\n", + "# Initialize LoraTrainer\n", + "training_args_dict = vars(training_args)\n", + "lora_trainer = LoraTrainer(\n", + " model=peft_model,\n", + " optimizer=optimizer,\n", + " loss_fn=causal_lm_loss,\n", + " lr_scheduler=lr_scheduler,\n", + " training_args=training_args_dict,\n", + " n_layers_to_skip_for_backprop=3,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Compile the model with FHE\n", + "lora_trainer.compile(inputset, n_bits=16)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting training using LoraTrainer...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Training: 100%|██████████| 10/10 [22:19<00:00, 133.98s/epoch, Epoch=10, Avg Loss=0.0795, FHE Mode=disable]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training completed. Final Avg Loss: 0.0795, FHE Mode: disable\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "# Train the model using LoraTrainer\n", + "print(\"Starting training using LoraTrainer...\")\n", + "lora_trainer.train(train_dataloader, num_epochs=EPOCHS, fhe=\"disable\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original model generation:\n", + "from concrete.ml.sklearn import LogisticRegression\n", + "\n", + "model = LogisticRegression( eta=0.1, max_iter=1000, random_state=42)\n", + "None\n", + "Fine-tuned model generation:\n", + "from concrete.ml.sklearn import LogisticRegression\n", + "\n", + "model = LogisticRegression( n_bits=7, max_iter=50)\n", + "None\n" + ] + } + ], + "source": [ + "# Compare generation before and after fine-tuning\n", + "peft_model.disable_adapter_layers()\n", + "print(\"Original model generation:\")\n", + "print(generate_and_print(PROMPT, peft_model, tokenizer, seed=SEED))\n", + "\n", + "peft_model.enable_adapter_layers()\n", + "print(\"Fine-tuned model generation:\")\n", + "print(generate_and_print(PROMPT, peft_model, tokenizer, seed=SEED))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Save the fine-tuned model\n", + "save_path = Path(\"deployment/llama_lora_finetuned\")\n", + "if save_path.is_dir() and any(save_path.iterdir()):\n", + " shutil.rmtree(save_path)\n", + "lora_trainer.save_and_clear_private_info(save_path)\n", + "\n", + "print(\"Model saved to:\", save_path)" + ] + } + ], + "metadata": { + "execution": { + "timeout": 10800 + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/use_case_examples/lora_finetuning/Makefile b/use_case_examples/lora_finetuning/Makefile index 8942d2e22..ed6edcf86 100644 --- a/use_case_examples/lora_finetuning/Makefile +++ b/use_case_examples/lora_finetuning/Makefile @@ -8,3 +8,6 @@ run_example: one one: @$(TIME_NB) GPT2FineTuneHybrid.ipynb + +two: + @$(TIME_NB) LlamaFineTuning.ipynb \ No newline at end of file diff --git a/use_case_examples/lora_finetuning/README.md b/use_case_examples/lora_finetuning/README.md index cf16d2176..a1513298f 100644 --- a/use_case_examples/lora_finetuning/README.md +++ b/use_case_examples/lora_finetuning/README.md @@ -1,16 +1,19 @@ -# Privacy Preserving GPT2 LoRA +# Privacy Preserving Language Models LoRA Fine-tuning -This project demonstrates how to fine-tune GPT-2 using Low-Rank Adaptation (LoRA) weights with Fully Homomorphic Encryption (FHE). The goal is to train a specialized model in a privacy-preserving manner, with minimal memory requirements. +This use case demonstrates how to fine-tune language models (GPT-2 and LLaMA) using Low-Rank Adaptation (LoRA) weights with Fully Homomorphic Encryption (FHE). The goal is to train specialized models in a privacy-preserving manner, with minimal memory requirements. ## Overview -Fine-tuning large language models typically requires access to sensitive data, which can raise privacy concerns. By leveraging FHE, we can perform computations on encrypted data, ensuring that the data remains private throughout the training process. In this approach, the LoRA weights are only known to the user who owns the data and the memory hungry foundation model remains on the server. +Fine-tuning large language models typically requires access to sensitive data, which can raise privacy concerns. By leveraging FHE, we can perform computations on encrypted foundation model weights, ensuring that the data remain private throughout the training process. The LoRA weights are kept in clear on the client side. ## Key Features -- **LoRA Fine-Tuning**: Fine-tune GPT-2 by adapting low-rank weights. -- **Hybrid Model**: Combine traditional and encrypted computations for optimal performance. -- **Low Memory Requirements**: Minimal client-side memory needed for LoRA weights. +- **LoRA Fine-Tuning**: Fine-tune language models by adapting low-rank weights +- **Hybrid Model**: Combine encrypted foundation model weights with clear LoRA weights for optimal performance +- **Low Memory Requirements**: Minimal client-side memory needed for LoRA weights +- **Multiple Approaches**: + - Custom training implementation for GPT-2 + - Simplified API-based approach for LLaMA using the `LoraTrainer` ## Setup @@ -26,9 +29,28 @@ pip install -r requirements.txt ## Usage -### Prepare the Dataset +### Available Notebooks -Replace the data-set in the `data_finetune` directory to the one you want to use for fine-tuning. +The repository includes two example notebooks: + +1. **GPT2FineTuneHybrid.ipynb**: + + - Uses a custom training implementation + - Fine-tunes GPT-2 on a small Q&A data-set about FHE + - Shows low-level control over the training process + +1. **LLamaFineTuning.ipynb**: + + - Uses Concrete ML's `LoraTrainer` API for simplified implementation + - Fine-tunes LLaMA on Concrete ML code examples + - Shows how to use the high-level API for encrypted fine-tuning + +### Prepare the data-set + +Each notebook includes its own data-set: + +- GPT-2 uses a small Q&A data-set about FHE in `data_finetune/what_is_fhe.txt` +- LLaMA uses Concrete ML code examples in `data_finetune/data-set.jsonl` ### Run the Fine-Tuning Script @@ -47,14 +69,18 @@ In a deployment or production scenario, the model can be fine-tuned as follows: ## Results -The fine-tuned model can generate specialized text based on the provided data-set while ensuring data privacy through FHE. +### GPT-2 Results After fine-tuning, the model's weights are distributed between the client and server as follows: - Total weights removed from the server: 68.24% - LoRA weights kept on the client: 147,456 (approximately 0.12% of the original model's weights) -Note that the embedding are not considered for now but contain a significant amount of weights (around 30%) for GPT2. They will be considered in a future version of Concrete ML. +Note that the embeddings are not considered for now but contain a significant amount of weights (around 30%) for GPT2. They will be considered in a future version of Concrete ML. + +### LLaMA Results + +TBD ## Conclusion diff --git a/use_case_examples/lora_finetuning/data_finetune/dataset.jsonl b/use_case_examples/lora_finetuning/data_finetune/dataset.jsonl new file mode 100644 index 000000000..06363d611 --- /dev/null +++ b/use_case_examples/lora_finetuning/data_finetune/dataset.jsonl @@ -0,0 +1,46 @@ +{"text": "class TinyCNN(nn.Module):"} +{"text": "def __init__(self, n_classes) -> None:\n super().__init__()\n self.conv1 = nn.Conv2d(1, 8, 3, stride=1, padding=0)\n self.conv2 = nn.Conv2d(8, 16, 3, stride=2, padding=0)\n self.conv3 = nn.Conv2d(16, 32, 2, stride=1, padding=0)\n self.fc1 = nn.Linear(32, n_classes)"} +{"text": "def forward(self, x):\n x = self.conv1(x)\n x = torch.relu(x)\n x = self.conv2(x)\n x = torch.relu(x)\n x = self.conv3(x)\n x = torch.relu(x)\n x = x.flatten(1)\n x = self.fc1(x)\n return x\nnet = TinyCNN(10)\n#... (Training loop)...\nq_module = compile_torch_model(net, x_train, rounding_threshold_bits=6, p_error=0.1)\n# Key generation\nq_module.fhe_circuit.keygen()\n# Inference"} +{"text": "self.fc1(x)\n return x\nnet = TinyCNN(10)\n#... (Training loop)...\nq_module = compile_torch_model(net, x_train, rounding_threshold_bits=6, p_error=0.1)\n# Key generation\nq_module.fhe_circuit.keygen()\n# Inference in FHE\ny_pred_fhe = q_module.forward(x_test, fhe=\"execute\")\n\n**4. Quantization-Aware Training:**\npython\nfrom torch import nn\nfrom concrete.ml.torch.compile import compile_brevitas_qat_model\nimport brevitas.nn as qnn\nfrom brev"} +{"text": "in FHE\ny_pred_fhe = q_module.forward(x_test, fhe=\"execute\")\n\n**4. Quantization-Aware Training:**\npython\nfrom torch import nn\nfrom concrete.ml.torch.compile import compile_brevitas_qat_model\nimport brevitas.nn as qnn\nfrom brevitas.core.bit_width import BitWidthImplType\nfrom brevitas.core.quant import QuantType\nfrom brevitas.core.restrict_val import FloatToIntImplType, RestrictValueType\nfrom brevitas.core.scaling import ScalingImplType\nfrom brevitas.core.zero_point import ZeroZeroPoint\nfrom bre"} +{"text": "itas.core.bit_width import BitWidthImplType\nfrom brevitas.core.quant import QuantType\nfrom brevitas.core.restrict_val import FloatToIntImplType, RestrictValueType\nfrom brevitas.core.scaling import ScalingImplType\nfrom brevitas.core.zero_point import ZeroZeroPoint\nfrom brevitas.inject import ExtendedInjector\nfrom brevitas.quant.solver import ActQuantSolver, WeightQuantSolver\nfrom dependencies import value\nfrom torch.nn.utils import prune\n#... (Data loading and preprocessing)..."} +{"text": "class CommonQuant(ExtendedInjector):\n bit_width_impl_type = BitWidthImplType.CONST\n scaling_impl_type = ScalingImplType.CONST\n restrict_scaling_type = RestrictValueType.FP\n zero_point_impl = ZeroZeroPoint\n float_to_int_impl_type = FloatToIntImplType.ROUND\n scaling_per_output_channel = False\n narrow_range = True\n signed = True\n @value"} +{"text": "def quant_type(bit_width): # pylint: disable=no-self-argument\n if bit_width is None:\n return QuantType.FP\n if bit_width == 1:\n return QuantType.BINARY\n return QuantType.INT"} +{"text": "class CommonWeightQuant(CommonQuant, WeightQuantSolver): # pylint: disable=too-many-ancestors\n scaling_const = 1.0\n signed = True"} +{"text": "class CommonActQuant(CommonQuant, ActQuantSolver): # pylint: disable=too-many-ancestors\n min_val = -1.0\n max_val = 1.0"} +{"text": "class QATPrunedSimpleNet(nn.Module):"} +{"text": "def __init__(self, n_hidden, qlinear_args, qidentity_args):\n super().__init__()\n self.pruned_layers = set()\n self.quant_inp = qnn.QuantIdentity(**qidentity_args)\n self.fc1 = qnn.QuantLinear(IN_FEAT, n_hidden, **qlinear_args)\n self.relu1 = qnn.QuantReLU(bit_width=qidentity_args[\"bit_width\"])\n self.fc2 = qnn.QuantLinear(n_hidden, n_hidden, **qlinear_args)\n self.relu2 = qnn.QuantReLU(bit_width=qidentity_args[\"bit_width"} +{"text": ", **qlinear_args)\n self.relu1 = qnn.QuantReLU(bit_width=qidentity_args[\"bit_width\"])\n self.fc2 = qnn.QuantLinear(n_hidden, n_hidden, **qlinear_args)\n self.relu2 = qnn.QuantReLU(bit_width=qidentity_args[\"bit_width\"])\n self.fc3 = qnn.QuantLinear(n_hidden, OUT_FEAT, **qlinear_args)\n for m in self.modules():\n if isinstance(m, qnn.QuantLinear):\n torch.nn.init.uniform_(m.weight.data, -1, 1)"} +{"text": "def forward(self, x):\n x = self.quant_inp(x)\n x = self.relu1(self.fc1(x))\n x = self.relu2(self.fc2(x))\n x = self.fc3(x)\n return x"} +{"text": "def prune(self, max_non_zero):\n # Linear layer weight has dimensions NumOutputs x NumInputs\n for name, layer in self.named_modules():\n if isinstance(layer, qnn.QuantLinear):\n num_zero_weights = (layer.weight.shape[1] - max_non_zero) * layer.weight.shape[0]\n if num_zero_weights <= 0:\n continue\n print(f\"Pruning layer {name} factor {num_zero_weights}\")\n prune.l1_unstructured(layer, \"weight\", amount=num_zero_weights)\n self.pruned_layers.add(name)"} +{"text": "def unprune(self):\n for name, layer in self.named_modules():\n if name in self.pruned_layers:\n prune.remove(layer, \"weight\")\n self.pruned_layers.remove(name)\ntorch_model = QATPrunedSimpleNet(\n n_hidden=n_hidden,\n qlinear_args={\n \"weight_bit_width\": 3,\n \"weight_quant\": CommonWeightQuant,\n \"bias\": True,\n \"bias_quant\": None,\n \"narrow_range\": True,\n },\n qidentity_args={\"bit_width\": 3, \"act_quant\": CommonActQuant},\n)\ntorch"} +{"text": "_args={\n \"weight_bit_width\": 3,\n \"weight_quant\": CommonWeightQuant,\n \"bias\": True,\n \"bias_quant\": None,\n \"narrow_range\": True,\n },\n qidentity_args={\"bit_width\": 3, \"act_quant\": CommonActQuant},\n)\ntorch_model.prune(20)\n#... (Training loop)...\nquantized_numpy_module = compile_brevitas_qat_model(torch_model, x_train)\n# Inference in FHE (simulation)\ny_pred_fhe = quantized_numpy_module.forward(x_test, fhe=\"simulate\")\n\n**5. Client/Server"} +{"text": "_model.prune(20)\n#... (Training loop)...\nquantized_numpy_module = compile_brevitas_qat_model(torch_model, x_train)\n# Inference in FHE (simulation)\ny_pred_fhe = quantized_numpy_module.forward(x_test, fhe=\"simulate\")\n\n**5. Client/Server Deployment (LogisticRegressionTraining.ipynb):**\npython\nfrom pathlib import Path\nfrom tempfile import TemporaryDirectory\nimport numpy as np\nfrom concrete.ml.deployment import FHEModelClient, FHEModelDev, FHEModelServer\nfrom concrete.ml.sklearn import SGDClassifier\nfrom concrete import fhe"} +{"text": "Deployment (LogisticRegressionTraining.ipynb):**\npython\nfrom pathlib import Path\nfrom tempfile import TemporaryDirectory\nimport numpy as np\nfrom concrete.ml.deployment import FHEModelClient, FHEModelDev, FHEModelServer\nfrom concrete.ml.sklearn import SGDClassifier\nfrom concrete import fhe\n#... (Data loading, preprocessing, and model training)...\n# Assuming you have a trained model: sgd_clf_binary_fhe\n# and x_compile_set, y_compile_set for compilation\n# Define the directory where to save the deployment files\nDEPLOYMENT_PATH = Path(\"fhe_training\")"} +{"text": "#... (Data loading, preprocessing, and model training)...\n# Assuming you have a trained model: sgd_clf_binary_fhe\n# and x_compile_set, y_compile_set for compilation\n# Define the directory where to save the deployment files\nDEPLOYMENT_PATH = Path(\"fhe_training\")\nDEPLOYMENT_PATH.mkdir(exist_ok=True)\ndeployment_dir = TemporaryDirectory(dir=str(DEPLOYMENT_PATH))\ndeployment_path = Path(deployment_dir.name)\n# Save the model for deployment\nfhe_dev = FHEModelDev(deployment_path, sgd_clf_binary_fhe)\nfhe_dev.save(mode=\""} +{"text": "DEPLOYMENT_PATH.mkdir(exist_ok=True)\ndeployment_dir = TemporaryDirectory(dir=str(DEPLOYMENT_PATH))\ndeployment_path = Path(deployment_dir.name)\n# Save the model for deployment\nfhe_dev = FHEModelDev(deployment_path, sgd_clf_binary_fhe)\nfhe_dev.save(mode=\"training\")\n# Client-side setup\nfhe_client = FHEModelClient(deployment_path)\nfhe_client.load()\nserialized_evaluation_keys = fhe_client.get_serialized_evaluation_keys()\n# Server-side setup\nfhe_server = FHEModelServer(deployment_path)\nfhe_server.load()\n# Example of encryption,"} +{"text": "training\")\n# Client-side setup\nfhe_client = FHEModelClient(deployment_path)\nfhe_client.load()\nserialized_evaluation_keys = fhe_client.get_serialized_evaluation_keys()\n# Server-side setup\nfhe_server = FHEModelServer(deployment_path)\nfhe_server.load()\n# Example of encryption, server-side processing, and decryption\nbatch_size = sgd_clf_binary_fhe.batch_size\nweights = np.random.rand(1, x_train.shape[1], 1)\nbias = np.random.rand(1, 1, 1)"} +{"text": "def quantize_encrypt_serialize_batches(fhe_client, x, y, weights, bias, batch_size):\n #... (Implementation as before)..."} +{"text": "def server_run(fhe_server, x_batches_enc, y_batches_enc, weights_enc, bias_enc, evaluation_keys):\n #... (Implementation as before)..."} +{"text": "def train_fhe_client_server(\n #... (Parameters as before)...\n):\n #... (Training loop)\n # Quantize, encrypt and serialize the batched inputs as well as the weight and bias values\n x_batches_enc, y_batches_enc, weights_enc, bias_enc = quantize_encrypt_serialize_batches(\n fhe_client, x, y, weights, bias, batch_size\n )\n # Iterate the circuit over the batches on the server\n fitted_weights_enc, fitted_bias_enc = server_run(\n fhe_server,\n x_batches_enc,\n y_batches_enc,\n weights_enc,"} +{"text": "_serialize_batches(\n fhe_client, x, y, weights, bias, batch_size\n )\n # Iterate the circuit over the batches on the server\n fitted_weights_enc, fitted_bias_enc = server_run(\n fhe_server,\n x_batches_enc,\n y_batches_enc,\n weights_enc,\n bias_enc,\n serialized_evaluation_keys,\n )\n # Back on the client, deserialize, decrypt and de-quantize the fitted weight and bias values\n weights, bias = fhe_client.deserialize_decrypt_dequantize(\n fitted_weights_enc, fitted_bias_enc\n )\n return weights, bias,"} +{"text": "bias_enc,\n serialized_evaluation_keys,\n )\n # Back on the client, deserialize, decrypt and de-quantize the fitted weight and bias values\n weights, bias = fhe_client.deserialize_decrypt_dequantize(\n fitted_weights_enc, fitted_bias_enc\n )\n return weights, bias, acc_history\n# Cleanup\ndeployment_dir.cleanup()\n\n**6. Hyper-parameter Tuning with GridSearchCV (XGBClassifier.ipynb, DecisionTreeRegressor.ipynb):**\npython\nfrom sklearn.model_selection import GridSearchCV\nfrom concrete.ml.sklearn import XGBClassifier as ConcreteXGBClassifier\nfrom"} +{"text": "acc_history\n# Cleanup\ndeployment_dir.cleanup()\n\n**6. Hyper-parameter Tuning with GridSearchCV (XGBClassifier.ipynb, DecisionTreeRegressor.ipynb):**\npython\nfrom sklearn.model_selection import GridSearchCV\nfrom concrete.ml.sklearn import XGBClassifier as ConcreteXGBClassifier\nfrom sklearn.metrics import make_scorer, matthews_corrcoef\n#... (Data loading and preprocessing)...\n# Create scorer with the MCC metric\ngrid_scorer = make_scorer(matthews_corrcoef, greater_is_better=True)\n# Define the parameter grid to search\nparam_grid = {"} +{"text": "sklearn.metrics import make_scorer, matthews_corrcoef\n#... (Data loading and preprocessing)...\n# Create scorer with the MCC metric\ngrid_scorer = make_scorer(matthews_corrcoef, greater_is_better=True)\n# Define the parameter grid to search\nparam_grid = {\n \"n_bits\": [5, 6],\n \"max_depth\": [2, 3],\n \"n_estimators\": [10, 20, 50],\n}\n# Instantiate GridSearchCV with the Concrete ML model\ngrid_search = GridSearchCV(\n ConcreteXGBClassifier(),\n param_grid"} +{"text": "\"n_bits\": [5, 6],\n \"max_depth\": [2, 3],\n \"n_estimators\": [10, 20, 50],\n}\n# Instantiate GridSearchCV with the Concrete ML model\ngrid_search = GridSearchCV(\n ConcreteXGBClassifier(),\n param_grid,\n cv=5,\n scoring=grid_scorer,\n error_score=\"raise\",\n verbose=1,\n)\n# Run the grid search\ngrid_search.fit(x_train, y_train)\n# Get the best parameters\nbest_params = grid_search.best_params_\n# Create a new model with the best parameters"} +{"text": ",\n cv=5,\n scoring=grid_scorer,\n error_score=\"raise\",\n verbose=1,\n)\n# Run the grid search\ngrid_search.fit(x_train, y_train)\n# Get the best parameters\nbest_params = grid_search.best_params_\n# Create a new model with the best parameters\nbest_model = ConcreteXGBClassifier(**best_params)\nbest_model.fit(x_train, y_train)\n# Compile and proceed with FHE inference as shown in other examples\n\n**7. GLM Models (GLMComparison.ipynb):**\n* **Poisson Regressor**\npython\nfrom concrete"} +{"text": "best_model = ConcreteXGBClassifier(**best_params)\nbest_model.fit(x_train, y_train)\n# Compile and proceed with FHE inference as shown in other examples\n\n**7. GLM Models (GLMComparison.ipynb):**\n* **Poisson Regressor**\npython\nfrom concrete.ml.sklearn import PoissonRegressor as ConcretePoissonRegressor\n#... (Data loading and preprocessing)...\nconcrete_pr = ConcretePoissonRegressor(n_bits=8)\nconcrete_pr.fit(x_train, y_train, sample_weight=train_weights)\ncircuit = concrete_pr.compile(x_train)\n# Key generation"} +{"text": ".ml.sklearn import PoissonRegressor as ConcretePoissonRegressor\n#... (Data loading and preprocessing)...\nconcrete_pr = ConcretePoissonRegressor(n_bits=8)\nconcrete_pr.fit(x_train, y_train, sample_weight=train_weights)\ncircuit = concrete_pr.compile(x_train)\n# Key generation\ncircuit.client.keygen(force=False)\n# Inference in FHE\ny_pred_fhe = concrete_pr.predict(x_test, fhe=\"execute\")\n\n* **Gamma Regressor**\npython\nfrom concrete.ml.sklearn import GammaRegressor as ConcreteGammaRegressor\n#... (Data loading and preprocessing)..."} +{"text": "circuit.client.keygen(force=False)\n# Inference in FHE\ny_pred_fhe = concrete_pr.predict(x_test, fhe=\"execute\")\n\n* **Gamma Regressor**\npython\nfrom concrete.ml.sklearn import GammaRegressor as ConcreteGammaRegressor\n#... (Data loading and preprocessing)...\nconcrete_gr = ConcreteGammaRegressor(n_bits=8)\nconcrete_gr.fit(x_train, y_train, sample_weight=train_weights)\ncircuit = concrete_gr.compile(x_train)\n# Key generation\ncircuit.client.keygen(force=False)\n# Inference in FHE\ny_pred_fhe = concrete_gr.predict(x"} +{"text": "concrete_gr = ConcreteGammaRegressor(n_bits=8)\nconcrete_gr.fit(x_train, y_train, sample_weight=train_weights)\ncircuit = concrete_gr.compile(x_train)\n# Key generation\ncircuit.client.keygen(force=False)\n# Inference in FHE\ny_pred_fhe = concrete_gr.predict(x_test, fhe=\"execute\")\n\n* **Tweedie Regressor**\npython\nfrom concrete.ml.sklearn import TweedieRegressor as ConcreteTweedieRegressor\n#... (Data loading and preprocessing)...\nconcrete_tr = ConcreteTweedieRegressor(n_bits=8, power=1.9"} +{"text": "_test, fhe=\"execute\")\n\n* **Tweedie Regressor**\npython\nfrom concrete.ml.sklearn import TweedieRegressor as ConcreteTweedieRegressor\n#... (Data loading and preprocessing)...\nconcrete_tr = ConcreteTweedieRegressor(n_bits=8, power=1.9)\nconcrete_tr.fit(x_train, y_train, sample_weight=train_weights)\ncircuit = concrete_tr.compile(x_train)\n# Key generation\ncircuit.client.keygen(force=False)\n# Inference in FHE\ny_pred_fhe = concrete_tr.predict(x_test, fhe=\"execute\")\n\n**8. Fine"} +{"text": ")\nconcrete_tr.fit(x_train, y_train, sample_weight=train_weights)\ncircuit = concrete_tr.compile(x_train)\n# Key generation\ncircuit.client.keygen(force=False)\n# Inference in FHE\ny_pred_fhe = concrete_tr.predict(x_test, fhe=\"execute\")\n\n**8. Fine-tuning with LoRA (LoraMLP.ipynb):**\npython\nimport torch\nfrom peft import LoraConfig, get_peft_model\nfrom torch import nn, optim\nfrom concrete.ml.torch.lora import LoraTrainer\n#... (Data loading and preprocessing)...\n# Define"} +{"text": "-tuning with LoRA (LoraMLP.ipynb):**\npython\nimport torch\nfrom peft import LoraConfig, get_peft_model\nfrom torch import nn, optim\nfrom concrete.ml.torch.lora import LoraTrainer\n#... (Data loading and preprocessing)...\n# Define an MLP model without LoRA layers"} +{"text": "class SimpleMLP(nn.Module):"} +{"text": "def __init__(self, input_size=2, hidden_size=128, num_classes=2):\n super().__init__()\n self.fc1 = nn.Linear(input_size, hidden_size)\n self.relu = nn.ReLU()\n self.fc2 = nn.Linear(hidden_size, num_classes)"} +{"text": "def forward(self, x):\n out = self.fc1(x)\n out = self.relu(out)\n out = self.fc2(out)\n return out\n# Instantiate the model\nmodel = SimpleMLP()\n#... (Training loop for Task 1)...\n# Apply LoRA to the model using peft\nlora_config = LoraConfig(\n r=1, lora_alpha=1, lora_dropout=0.01, target_modules=[\"fc1\", \"fc2\"], bias=\"none\"\n)\npeft_model = get_peft_model(model, lora_config)\n# Update training parameters"} +{"text": "using peft\nlora_config = LoraConfig(\n r=1, lora_alpha=1, lora_dropout=0.01, target_modules=[\"fc1\", \"fc2\"], bias=\"none\"\n)\npeft_model = get_peft_model(model, lora_config)\n# Update training parameters, including loss function\noptimizer = optim.Adam(filter(lambda p: p.requires_grad, peft_model.parameters()), lr=0.01)\nloss_fn = nn.CrossEntropyLoss()\ntraining_args = {\"gradient_accumulation_steps\": 1}\n# Set up LoRA training\nlora_trainer = LoraTrainer"} +{"text": ", including loss function\noptimizer = optim.Adam(filter(lambda p: p.requires_grad, peft_model.parameters()), lr=0.01)\nloss_fn = nn.CrossEntropyLoss()\ntraining_args = {\"gradient_accumulation_steps\": 1}\n# Set up LoRA training\nlora_trainer = LoraTrainer(peft_model, optimizer=optimizer, loss_fn=loss_fn, training_args=training_args)\n# Prepare input data for calibration\nbatch_size_per_task = batch_size // 2\ninputset = (\n torch.cat([X_task1[:batch_size_per_task], X_task2[:batch_size_per_task]]"} +{"text": "(peft_model, optimizer=optimizer, loss_fn=loss_fn, training_args=training_args)\n# Prepare input data for calibration\nbatch_size_per_task = batch_size // 2\ninputset = (\n torch.cat([X_task1[:batch_size_per_task], X_task2[:batch_size_per_task]]),\n torch.cat([y_task1[:batch_size_per_task], y_task2[:batch_size_per_task]]),\n)\n# Compile the model\nlora_trainer.compile(inputset, n_bits=8)\n# Fine-tune the model on Task 2 using LoRA\nlora_trainer.train(train_loader"} +{"text": "),\n torch.cat([y_task1[:batch_size_per_task], y_task2[:batch_size_per_task]]),\n)\n# Compile the model\nlora_trainer.compile(inputset, n_bits=8)\n# Fine-tune the model on Task 2 using LoRA\nlora_trainer.train(train_loader_task2, num_epochs=10, fhe=\"execute\")\n# Enable/Disable LoRA adapters\npeft_model.enable_adapter_layers()\npeft_model.disable_adapter_layers()\n# Print trainable (lora) parameters\npeft_model.print_trainable_parameters()\n# Save the model and remove all layers that will be done"} +{"text": "_task2, num_epochs=10, fhe=\"execute\")\n# Enable/Disable LoRA adapters\npeft_model.enable_adapter_layers()\npeft_model.disable_adapter_layers()\n# Print trainable (lora) parameters\npeft_model.print_trainable_parameters()\n# Save the model and remove all layers that will be done on the server\npath = Path(\"lora_mlp\")\nif path.is_dir() and any(path.iterdir()):\n shutil.rmtree(path)\nlora_trainer.save_and_clear_private_info(path)"} diff --git a/use_case_examples/lora_finetuning/data_finetune/raw_cml_1.7.0_examples.txt b/use_case_examples/lora_finetuning/data_finetune/raw_cml_1.7.0_examples.txt new file mode 100644 index 000000000..6adba5a62 --- /dev/null +++ b/use_case_examples/lora_finetuning/data_finetune/raw_cml_1.7.0_examples.txt @@ -0,0 +1,458 @@ +**1. Linear Models:** +* **Logistic Regression:** +python +from concrete.ml.sklearn import LogisticRegression as ConcreteLogisticRegression +# ... (Data loading and preprocessing) ... +concrete_logr = ConcreteLogisticRegression(n_bits=8) +concrete_logr.fit(x_train, y_train) +fhe_circuit = concrete_logr.compile(x_train) +# Key generation +fhe_circuit.client.keygen(force=False) +# Inference in FHE +y_pred_fhe = concrete_logr.predict(x_test, fhe="execute") + +* **Linear Regression:** +python +from concrete.ml.sklearn import LinearRegression as ConcreteLinearRegression +# ... (Data loading and preprocessing) ... +concrete_lr = ConcreteLinearRegression(n_bits=8) +concrete_lr.fit(x_train, y_train) +fhe_circuit = concrete_lr.compile(x_train) +# Key generation +fhe_circuit.client.keygen(force=False) +# Inference in FHE +y_pred_fhe = concrete_lr.predict(x_test, fhe="execute") + +* **Linear SVR:** +python +from concrete.ml.sklearn.svm import LinearSVR as ConcreteLinearSVR +# ... (Data loading and preprocessing) ... +concrete_svr = ConcreteLinearSVR(n_bits=8, C=0.5) +concrete_svr.fit(x_train, y_train) +circuit = concrete_svr.compile(x_train) +# Key generation +circuit.client.keygen(force=False) +# Inference in FHE +y_pred_fhe = concrete_svr.predict(x_test, fhe="execute") + +* **Linear SVC** +python +from concrete.ml.sklearn.svm import LinearSVC as ConcreteLinearSVC +# ... (Data loading and preprocessing) ... +concrete_svc = ConcreteLinearSVC(n_bits=8, C=0.025) +concrete_svc.fit(x_train, y_train) +circuit = concrete_svc.compile(x_train) +# Inference in FHE +y_pred_fhe = concrete_svc.predict(x_test, fhe="execute") + +**2. Tree-Based Models:** +* **XGBoost Classifier:** +python +from concrete.ml.sklearn import XGBClassifier as ConcreteXGBClassifier +# ... (Data loading and preprocessing) ... +concrete_xgb = ConcreteXGBClassifier(n_bits=6, n_estimators=50, max_depth=4) +concrete_xgb.fit(x_train, y_train) +circuit = concrete_xgb.compile(x_train) +# Key generation +circuit.client.keygen(force=False) +# Inference in FHE +y_preds_fhe = concrete_xgb.predict(x_test, fhe="execute") + +* **XGBoost Regressor:** +python +from concrete.ml.sklearn import XGBRegressor as ConcreteXGBRegressor +# ... (Data loading and preprocessing) ... +concrete_xgb = ConcreteXGBRegressor(n_bits=6, n_estimators=50, max_depth=4) +concrete_xgb.fit(x_train, y_train) +circuit = concrete_xgb.compile(x_train) +# Key generation +circuit.client.keygen(force=False) +# Inference in FHE +y_preds_fhe = concrete_xgb.predict(x_test, fhe="execute") + +* **Decision Tree Classifier:** +python +from concrete.ml.sklearn import DecisionTreeClassifier as ConcreteDecisionTreeClassifier +# ... (Data loading and preprocessing) ... +model = ConcreteDecisionTreeClassifier( + max_features="log2", + min_samples_leaf=1, + min_samples_split=2, + max_depth=6, + n_bits=6, +) +model.fit(x_train, y_train) +circuit = model.compile(x_train) +# Key generation +circuit.client.keygen(force=False) +# Inference in FHE +y_pred_fhe = model.predict(x_test, fhe="execute") + +* **Decision Tree Regressor:** +python +from concrete.ml.sklearn import DecisionTreeRegressor as ConcreteDecisionTreeRegressor +# ... (Data loading and preprocessing) ... +model = ConcreteDecisionTreeRegressor( + max_depth=10, + max_features=5, + min_samples_leaf=2, + min_samples_split=10, + n_bits=6, + random_state=42, +) +model.fit(x_train, y_train) +circuit = model.compile(x_train) +# Key generation +circuit.client.keygen(force=False) +# Inference in FHE +y_pred_fhe = model.predict(x_test, fhe="execute") + +* **Random Forest Classifier:** +python +from concrete.ml.sklearn import RandomForestClassifier +# ... (Data loading and preprocessing) ... +model = RandomForestClassifier(max_depth=4, n_estimators=5, n_bits=5) +model.fit(x_train, y_train) +circuit = model.compile(x_train) +# Key generation +circuit.client.keygen(force=False) +# Inference in FHE +y_pred_fhe = model.predict(x_test, fhe="execute") + +* **Random Forest Regressor:** +python +from concrete.ml.sklearn import RandomForestRegressor +# ... (Data loading and preprocessing) ... +model = RandomForestRegressor(n_bits=5, n_estimators=50, max_depth=4) +model.fit(x_train, y_train) +circuit = model.compile(x_train) +# Key generation +circuit.client.keygen(force=False) +# Inference in FHE +y_pred_fhe = model.predict(x_test, fhe="execute") + +**3. Neural Networks:** +* **Fully Connected Neural Network:** +python +from torch import nn +from concrete.ml.sklearn import NeuralNetClassifier +# ... (Data loading and preprocessing) ... +parameters_neural_net = { + "module__n_w_bits": 2, + "module__n_a_bits": 4, + "module__n_accum_bits": 32, + "module__n_hidden_neurons_multiplier": 6, + "module__n_layers": 2, # 1 hidden layer + "module__activation_function": nn.ReLU, + "max_epochs": 400, + "verbose": 0, + "lr": 0.001, +} +model = NeuralNetClassifier(batch_size=32, **parameters_neural_net) +model.fit(X=x_train, y=y_train) +fhe_circuit = model.compile(x_train) +# Key generation +fhe_circuit.client.keygen(force=False) +# Inference in FHE +y_pred_fhe = model.predict(x_test, fhe="execute") + +* **Convolutional Neural Network:** +python +import torch +from torch import nn +from concrete.ml.torch.compile import compile_torch_model +# ... (Data loading and preprocessing) ... +class TinyCNN(nn.Module): + def __init__(self, n_classes) -> None: + super().__init__() + self.conv1 = nn.Conv2d(1, 8, 3, stride=1, padding=0) + self.conv2 = nn.Conv2d(8, 16, 3, stride=2, padding=0) + self.conv3 = nn.Conv2d(16, 32, 2, stride=1, padding=0) + self.fc1 = nn.Linear(32, n_classes) + def forward(self, x): + x = self.conv1(x) + x = torch.relu(x) + x = self.conv2(x) + x = torch.relu(x) + x = self.conv3(x) + x = torch.relu(x) + x = x.flatten(1) + x = self.fc1(x) + return x +net = TinyCNN(10) +# ... (Training loop) ... +q_module = compile_torch_model(net, x_train, rounding_threshold_bits=6, p_error=0.1) +# Key generation +q_module.fhe_circuit.keygen() +# Inference in FHE +y_pred_fhe = q_module.forward(x_test, fhe="execute") + +**4. Quantization-Aware Training:** +python +from torch import nn +from concrete.ml.torch.compile import compile_brevitas_qat_model +import brevitas.nn as qnn +from brevitas.core.bit_width import BitWidthImplType +from brevitas.core.quant import QuantType +from brevitas.core.restrict_val import FloatToIntImplType, RestrictValueType +from brevitas.core.scaling import ScalingImplType +from brevitas.core.zero_point import ZeroZeroPoint +from brevitas.inject import ExtendedInjector +from brevitas.quant.solver import ActQuantSolver, WeightQuantSolver +from dependencies import value +from torch.nn.utils import prune +# ... (Data loading and preprocessing) ... +class CommonQuant(ExtendedInjector): + bit_width_impl_type = BitWidthImplType.CONST + scaling_impl_type = ScalingImplType.CONST + restrict_scaling_type = RestrictValueType.FP + zero_point_impl = ZeroZeroPoint + float_to_int_impl_type = FloatToIntImplType.ROUND + scaling_per_output_channel = False + narrow_range = True + signed = True + @value + def quant_type(bit_width): # pylint: disable=no-self-argument + if bit_width is None: + return QuantType.FP + if bit_width == 1: + return QuantType.BINARY + return QuantType.INT +class CommonWeightQuant(CommonQuant, WeightQuantSolver): # pylint: disable=too-many-ancestors + scaling_const = 1.0 + signed = True +class CommonActQuant(CommonQuant, ActQuantSolver): # pylint: disable=too-many-ancestors + min_val = -1.0 + max_val = 1.0 +class QATPrunedSimpleNet(nn.Module): + def __init__(self, n_hidden, qlinear_args, qidentity_args): + super().__init__() + self.pruned_layers = set() + self.quant_inp = qnn.QuantIdentity(**qidentity_args) + self.fc1 = qnn.QuantLinear(IN_FEAT, n_hidden, **qlinear_args) + self.relu1 = qnn.QuantReLU(bit_width=qidentity_args["bit_width"]) + self.fc2 = qnn.QuantLinear(n_hidden, n_hidden, **qlinear_args) + self.relu2 = qnn.QuantReLU(bit_width=qidentity_args["bit_width"]) + self.fc3 = qnn.QuantLinear(n_hidden, OUT_FEAT, **qlinear_args) + for m in self.modules(): + if isinstance(m, qnn.QuantLinear): + torch.nn.init.uniform_(m.weight.data, -1, 1) + def forward(self, x): + x = self.quant_inp(x) + x = self.relu1(self.fc1(x)) + x = self.relu2(self.fc2(x)) + x = self.fc3(x) + return x + def prune(self, max_non_zero): + # Linear layer weight has dimensions NumOutputs x NumInputs + for name, layer in self.named_modules(): + if isinstance(layer, qnn.QuantLinear): + num_zero_weights = (layer.weight.shape[1] - max_non_zero) * layer.weight.shape[0] + if num_zero_weights <= 0: + continue + print(f"Pruning layer {name} factor {num_zero_weights}") + prune.l1_unstructured(layer, "weight", amount=num_zero_weights) + self.pruned_layers.add(name) + def unprune(self): + for name, layer in self.named_modules(): + if name in self.pruned_layers: + prune.remove(layer, "weight") + self.pruned_layers.remove(name) +torch_model = QATPrunedSimpleNet( + n_hidden=n_hidden, + qlinear_args={ + "weight_bit_width": 3, + "weight_quant": CommonWeightQuant, + "bias": True, + "bias_quant": None, + "narrow_range": True, + }, + qidentity_args={"bit_width": 3, "act_quant": CommonActQuant}, +) +torch_model.prune(20) +# ... (Training loop) ... +quantized_numpy_module = compile_brevitas_qat_model(torch_model, x_train) +# Inference in FHE (simulation) +y_pred_fhe = quantized_numpy_module.forward(x_test, fhe="simulate") + +**5. Client/Server Deployment (LogisticRegressionTraining.ipynb):** +python +from pathlib import Path +from tempfile import TemporaryDirectory +import numpy as np +from concrete.ml.deployment import FHEModelClient, FHEModelDev, FHEModelServer +from concrete.ml.sklearn import SGDClassifier +from concrete import fhe +# ... (Data loading, preprocessing, and model training) ... +# Assuming you have a trained model: sgd_clf_binary_fhe +# and x_compile_set, y_compile_set for compilation +# Define the directory where to save the deployment files +DEPLOYMENT_PATH = Path("fhe_training") +DEPLOYMENT_PATH.mkdir(exist_ok=True) +deployment_dir = TemporaryDirectory(dir=str(DEPLOYMENT_PATH)) +deployment_path = Path(deployment_dir.name) +# Save the model for deployment +fhe_dev = FHEModelDev(deployment_path, sgd_clf_binary_fhe) +fhe_dev.save(mode="training") +# Client-side setup +fhe_client = FHEModelClient(deployment_path) +fhe_client.load() +serialized_evaluation_keys = fhe_client.get_serialized_evaluation_keys() +# Server-side setup +fhe_server = FHEModelServer(deployment_path) +fhe_server.load() +# Example of encryption, server-side processing, and decryption +batch_size = sgd_clf_binary_fhe.batch_size +weights = np.random.rand(1, x_train.shape[1], 1) +bias = np.random.rand(1, 1, 1) +def quantize_encrypt_serialize_batches(fhe_client, x, y, weights, bias, batch_size): + # ... (Implementation as before) ... +def server_run(fhe_server, x_batches_enc, y_batches_enc, weights_enc, bias_enc, evaluation_keys): + # ... (Implementation as before) ... +def train_fhe_client_server( + # ... (Parameters as before) ... +): + # ... (Training loop) + # Quantize, encrypt and serialize the batched inputs as well as the weight and bias values + x_batches_enc, y_batches_enc, weights_enc, bias_enc = quantize_encrypt_serialize_batches( + fhe_client, x, y, weights, bias, batch_size + ) + # Iterate the circuit over the batches on the server + fitted_weights_enc, fitted_bias_enc = server_run( + fhe_server, + x_batches_enc, + y_batches_enc, + weights_enc, + bias_enc, + serialized_evaluation_keys, + ) + # Back on the client, deserialize, decrypt and de-quantize the fitted weight and bias values + weights, bias = fhe_client.deserialize_decrypt_dequantize( + fitted_weights_enc, fitted_bias_enc + ) + return weights, bias, acc_history +# Cleanup +deployment_dir.cleanup() + +**6. Hyper-parameter Tuning with GridSearchCV (XGBClassifier.ipynb, DecisionTreeRegressor.ipynb):** +python +from sklearn.model_selection import GridSearchCV +from concrete.ml.sklearn import XGBClassifier as ConcreteXGBClassifier +from sklearn.metrics import make_scorer, matthews_corrcoef +# ... (Data loading and preprocessing) ... +# Create scorer with the MCC metric +grid_scorer = make_scorer(matthews_corrcoef, greater_is_better=True) +# Define the parameter grid to search +param_grid = { + "n_bits": [5, 6], + "max_depth": [2, 3], + "n_estimators": [10, 20, 50], +} +# Instantiate GridSearchCV with the Concrete ML model +grid_search = GridSearchCV( + ConcreteXGBClassifier(), + param_grid, + cv=5, + scoring=grid_scorer, + error_score="raise", + verbose=1, +) +# Run the grid search +grid_search.fit(x_train, y_train) +# Get the best parameters +best_params = grid_search.best_params_ +# Create a new model with the best parameters +best_model = ConcreteXGBClassifier(**best_params) +best_model.fit(x_train, y_train) +# Compile and proceed with FHE inference as shown in other examples + +**7. GLM Models (GLMComparison.ipynb):** +* **Poisson Regressor** +python +from concrete.ml.sklearn import PoissonRegressor as ConcretePoissonRegressor +# ... (Data loading and preprocessing) ... +concrete_pr = ConcretePoissonRegressor(n_bits=8) +concrete_pr.fit(x_train, y_train, sample_weight=train_weights) +circuit = concrete_pr.compile(x_train) +# Key generation +circuit.client.keygen(force=False) +# Inference in FHE +y_pred_fhe = concrete_pr.predict(x_test, fhe="execute") + +* **Gamma Regressor** +python +from concrete.ml.sklearn import GammaRegressor as ConcreteGammaRegressor +# ... (Data loading and preprocessing) ... +concrete_gr = ConcreteGammaRegressor(n_bits=8) +concrete_gr.fit(x_train, y_train, sample_weight=train_weights) +circuit = concrete_gr.compile(x_train) +# Key generation +circuit.client.keygen(force=False) +# Inference in FHE +y_pred_fhe = concrete_gr.predict(x_test, fhe="execute") + +* **Tweedie Regressor** +python +from concrete.ml.sklearn import TweedieRegressor as ConcreteTweedieRegressor +# ... (Data loading and preprocessing) ... +concrete_tr = ConcreteTweedieRegressor(n_bits=8, power=1.9) +concrete_tr.fit(x_train, y_train, sample_weight=train_weights) +circuit = concrete_tr.compile(x_train) +# Key generation +circuit.client.keygen(force=False) +# Inference in FHE +y_pred_fhe = concrete_tr.predict(x_test, fhe="execute") + +**8. Fine-tuning with LoRA (LoraMLP.ipynb):** +python +import torch +from peft import LoraConfig, get_peft_model +from torch import nn, optim +from concrete.ml.torch.lora import LoraTrainer +# ... (Data loading and preprocessing) ... +# Define an MLP model without LoRA layers +class SimpleMLP(nn.Module): + def __init__(self, input_size=2, hidden_size=128, num_classes=2): + super().__init__() + self.fc1 = nn.Linear(input_size, hidden_size) + self.relu = nn.ReLU() + self.fc2 = nn.Linear(hidden_size, num_classes) + def forward(self, x): + out = self.fc1(x) + out = self.relu(out) + out = self.fc2(out) + return out +# Instantiate the model +model = SimpleMLP() +# ... (Training loop for Task 1) ... +# Apply LoRA to the model using peft +lora_config = LoraConfig( + r=1, lora_alpha=1, lora_dropout=0.01, target_modules=["fc1", "fc2"], bias="none" +) +peft_model = get_peft_model(model, lora_config) +# Update training parameters, including loss function +optimizer = optim.Adam(filter(lambda p: p.requires_grad, peft_model.parameters()), lr=0.01) +loss_fn = nn.CrossEntropyLoss() +training_args = {"gradient_accumulation_steps": 1} +# Set up LoRA training +lora_trainer = LoraTrainer(peft_model, optimizer=optimizer, loss_fn=loss_fn, training_args=training_args) +# Prepare input data for calibration +batch_size_per_task = batch_size // 2 +inputset = ( + torch.cat([X_task1[:batch_size_per_task], X_task2[:batch_size_per_task]]), + torch.cat([y_task1[:batch_size_per_task], y_task2[:batch_size_per_task]]), +) +# Compile the model +lora_trainer.compile(inputset, n_bits=8) +# Fine-tune the model on Task 2 using LoRA +lora_trainer.train(train_loader_task2, num_epochs=10, fhe="execute") +# Enable/Disable LoRA adapters +peft_model.enable_adapter_layers() +peft_model.disable_adapter_layers() +# Print trainable (lora) parameters +peft_model.print_trainable_parameters() +# Save the model and remove all layers that will be done on the server +path = Path("lora_mlp") +if path.is_dir() and any(path.iterdir()): + shutil.rmtree(path) +lora_trainer.save_and_clear_private_info(path) diff --git a/use_case_examples/lora_finetuning/requirements.txt b/use_case_examples/lora_finetuning/requirements.txt index 7ea93063a..da6495fef 100644 --- a/use_case_examples/lora_finetuning/requirements.txt +++ b/use_case_examples/lora_finetuning/requirements.txt @@ -1,8 +1,9 @@ -e ../../. -transformers==4.41.2 -peft==0.11.1 +transformers==4.46.3 +peft==0.12.0 Jinja2==3.1.4 matplotlib==3.7.5 -datasets==3.0.1 -jupyter==1.0.0 -tqdm==4.66.5 \ No newline at end of file +datasets==3.1.0 +accelerate==1.2.0 +jupyter==1.1.1 +tqdm==4.67.1 \ No newline at end of file diff --git a/use_case_examples/lora_finetuning/scripts/create_dataset.py b/use_case_examples/lora_finetuning/scripts/create_dataset.py new file mode 100644 index 000000000..091f33e71 --- /dev/null +++ b/use_case_examples/lora_finetuning/scripts/create_dataset.py @@ -0,0 +1,109 @@ +import json +import re +from pathlib import Path + +from transformers import AutoTokenizer + + +def init_tokenizer(): + return AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B") + + +def chunk_text_by_tokens(text, tokenizer, max_tokens=128): + """Split text into chunks that don't exceed max_tokens with overlap.""" + overlap_tokens = max_tokens // 2 + tokens = tokenizer.encode(text) + chunks = [] + + # Start indices for each chunk + start_idx = 0 + + while start_idx < len(tokens): + # Calculate end index for current chunk + end_idx = min(start_idx + max_tokens, len(tokens)) + + # Get current chunk + current_chunk = tokens[start_idx:end_idx] + chunk_text = tokenizer.decode(current_chunk, skip_special_tokens=True) + + if chunk_text.strip(): + chunks.append(chunk_text) + + # Move start_idx forward by (max_tokens - overlap_tokens) + start_idx += max_tokens - overlap_tokens + + # If the remaining text is shorter than the overlap, we're done + if len(tokens) - start_idx < overlap_tokens: + break + + return chunks + + +def split_code_into_snippets(code): + # Split code into functions, classes, and other logical blocks + pattern = re.compile(r"^\s*(def |class )", re.MULTILINE) + indices = [match.start() for match in pattern.finditer(code)] + indices.append(len(code)) + snippets = [code[indices[i] : indices[i + 1]] for i in range(len(indices) - 1)] + return snippets + + +def process_code_file(code_file_path, tokenizer, max_tokens=128): + with open(code_file_path, "r", encoding="utf-8") as file: + code = file.read() + snippets = split_code_into_snippets(code) + # Further split snippets if they exceed token limit + tokenized_snippets = [] + for snippet in snippets: + tokenized_snippets.extend(chunk_text_by_tokens(snippet, tokenizer, max_tokens)) + return tokenized_snippets + + +def process_documentation_file(doc_file_path, tokenizer, max_tokens=128): + with open(doc_file_path, "r", encoding="utf-8") as file: + documentation = file.read() + snippets = documentation.split("\n\n") + # Further split snippets if they exceed token limit + tokenized_snippets = [] + for snippet in snippets: + tokenized_snippets.extend(chunk_text_by_tokens(snippet, tokenizer, max_tokens)) + return tokenized_snippets + + +def save_to_jsonl(snippets, output_file_path): + with open(output_file_path, "w", encoding="utf-8") as outfile: + for snippet in snippets: + snippet = snippet.strip() + if snippet: + json_line = json.dumps({"text": snippet}) + outfile.write(json_line + "\n") + + +def main(): + # Get the absolute path to the script's location + script_dir = Path(__file__).resolve().parent + + # Calculate paths relative to the script location + output_dir = script_dir.parent / "data_finetune" + + # Paths to your code and documentation files + code_file_path = output_dir / "raw_cml_1.7.0_examples.txt" + output_file_path = output_dir / "dataset.jsonl" + + # Initialize tokenizer + tokenizer = init_tokenizer() + max_tokens = 128 + + # Process code files with token control + code_snippets = process_code_file(code_file_path, tokenizer, max_tokens) + + # Combine snippets + all_snippets = code_snippets + + # Save to dataset.jsonl + save_to_jsonl(all_snippets, output_file_path) + print(f"Dataset saved to {output_file_path}") + + +if __name__ == "__main__": + main() diff --git a/use_case_examples/lora_finetuning/utils_lora.py b/use_case_examples/lora_finetuning/utils_lora.py index 1cad80804..0ffd40d4c 100644 --- a/use_case_examples/lora_finetuning/utils_lora.py +++ b/use_case_examples/lora_finetuning/utils_lora.py @@ -6,6 +6,33 @@ import numpy as np import torch import torch.backends.cudnn as cudnn +from transformers.generation.stopping_criteria import ( # Add this line + StoppingCriteria, + StoppingCriteriaList, +) + + +class NewlineStopping(StoppingCriteria): + def __init__(self, tokenizer): + self.tokenizer = tokenizer + # Get all token IDs that represent newline characters + self.newline_tokens = set( + [ + self.tokenizer.encode("\n")[0], + self.tokenizer.encode("\r")[0] if len(self.tokenizer.encode("\r")) > 0 else None, + ( + self.tokenizer.encode("\r\n")[0] + if len(self.tokenizer.encode("\r\n")) > 0 + else None + ), + ] + ) + self.newline_tokens.discard(None) + + def __call__(self, input_ids, scores, **kwargs): + # Check if the last generated token is a newline + last_token = input_ids[0][-1].item() + return last_token in self.newline_tokens def generate_and_print(prompt, model, tokenizer, seed=None, max_new_tokens=30): @@ -54,8 +81,11 @@ def generate_and_print(prompt, model, tokenizer, seed=None, max_new_tokens=30): if generated_text.startswith(prompt): generated_text = generated_text[len(prompt) :].strip() - # Print the user prompt and the generated text separated by a newline - print(f"{prompt}\n{generated_text}") + # Only keep text up to the first newline + generated_text = generated_text.split("\n")[0] + + # Print the prompt and generated text on the same line + print(f"{prompt} {generated_text}") def print_weights_and_size(model, print_detail=False):