chore: experiment with glwe dot product for MLPs

fix: use deai matmul fix: refactor glwe executor in hybrid model feat: use fhe execution in mlp lora example fix: better notebook printing chore: remove mlp lora example fix: update to use cml extensions from pypi feat: add glwe library, optimize compilation of hybrid model fix: handle simulate/disable in hybrid model full linear fix: pcc fix: pcc fix: bad assert fix: test fix: bad link fix: pcc, tests, glwe extensions only on linux chore: update licenses fix: tests fix: revert gpt2 fix: precomputed qat handled properly fix: codeblock test fix: readme link fix: deepcopy fix: refactoring fix: comments
zama-ai · Oct 17, 2024 · ba292fb · ba292fb
1 parent 9c75c11
commit ba292fb
Show file tree

Hide file tree

Showing 25 changed files with 1,346 additions and 670 deletions.
diff --git a/deps_licenses/licenses_linux_user.txt b/deps_licenses/licenses_linux_user.txt
@@ -4,15 +4,16 @@ MarkupSafe, 2.1.5, BSD License
 PyYAML, 6.0.2, MIT License
 brevitas, 0.10.2, UNKNOWN
 certifi, 2024.8.30, Mozilla Public License 2.0 (MPL 2.0)
-charset-normalizer, 3.3.2, MIT License
+charset-normalizer, 3.4.0, MIT License
 coloredlogs, 15.0.1, MIT License
+concrete-ml-extensions, 0.1.2, BSD-3-Clause-Clear
 concrete-python, 2.8.1, BSD-3-Clause
 dependencies, 2.0.1, BSD License
-dill, 0.3.8, BSD License
+dill, 0.3.9, BSD License
 filelock, 3.16.1, The Unlicense (Unlicense)
 flatbuffers, 2.0.7, Apache Software License
 fsspec, 2024.9.0, BSD License
-huggingface-hub, 0.25.1, Apache Software License
+huggingface-hub, 0.25.2, Apache Software License
 humanfriendly, 10.0, MIT License
 hummingbird-ml, 0.4.11, MIT License
 idna, 3.10, BSD License
@@ -32,7 +33,7 @@ nvidia-curand-cu12, 10.3.2.106, Other/Proprietary License
 nvidia-cusolver-cu12, 11.4.5.107, Other/Proprietary License
 nvidia-cusparse-cu12, 12.1.0.106, Other/Proprietary License
 nvidia-nccl-cu12, 2.20.5, Other/Proprietary License
-nvidia-nvjitlink-cu12, 12.6.68, Other/Proprietary License
+nvidia-nvjitlink-cu12, 12.6.77, Other/Proprietary License
 nvidia-nvtx-cu12, 12.1.105, Other/Proprietary License
 onnx, 1.16.1, Apache License v2.0
 onnxconverter-common, 1.13.0, MIT License

diff --git a/deps_licenses/licenses_linux_user.txt.md5 b/deps_licenses/licenses_linux_user.txt.md5
@@ -1 +1 @@
-ac76836858506534a0dc01cae9341f7d
+8ea8aec4f5aac03565c2dcb9f3f8a1da
diff --git a/deps_licenses/licenses_mac_intel_user.txt b/deps_licenses/licenses_mac_intel_user.txt
@@ -4,15 +4,15 @@ MarkupSafe, 2.1.5, BSD License
 PyYAML, 6.0.2, MIT License
 brevitas, 0.10.2, UNKNOWN
 certifi, 2024.8.30, Mozilla Public License 2.0 (MPL 2.0)
-charset-normalizer, 3.3.2, MIT License
+charset-normalizer, 3.4.0, MIT License
 coloredlogs, 15.0.1, MIT License
 concrete-python, 2.8.1, BSD-3-Clause
 dependencies, 2.0.1, BSD License
-dill, 0.3.8, BSD License
+dill, 0.3.9, BSD License
 filelock, 3.16.1, The Unlicense (Unlicense)
 flatbuffers, 2.0.7, Apache Software License
 fsspec, 2024.9.0, BSD License
-huggingface-hub, 0.25.1, Apache Software License
+huggingface-hub, 0.25.2, Apache Software License
 humanfriendly, 10.0, MIT License
 hummingbird-ml, 0.4.11, MIT License
 idna, 3.10, BSD License

diff --git a/deps_licenses/licenses_mac_intel_user.txt.md5 b/deps_licenses/licenses_mac_intel_user.txt.md5
@@ -1 +1 @@
-ac76836858506534a0dc01cae9341f7d
+8ea8aec4f5aac03565c2dcb9f3f8a1da
diff --git a/deps_licenses/licenses_mac_silicon_user.txt b/deps_licenses/licenses_mac_silicon_user.txt
@@ -4,15 +4,15 @@ MarkupSafe, 2.1.5, BSD License
 PyYAML, 6.0.2, MIT License
 brevitas, 0.10.2, UNKNOWN
 certifi, 2024.8.30, Mozilla Public License 2.0 (MPL 2.0)
-charset-normalizer, 3.3.2, MIT License
+charset-normalizer, 3.4.0, MIT License
 coloredlogs, 15.0.1, MIT License
 concrete-python, 2.8.1, BSD-3-Clause
 dependencies, 2.0.1, BSD License
-dill, 0.3.8, BSD License
+dill, 0.3.9, BSD License
 filelock, 3.16.1, The Unlicense (Unlicense)
 flatbuffers, 2.0.7, Apache Software License
 fsspec, 2024.9.0, BSD License
-huggingface-hub, 0.25.1, Apache Software License
+huggingface-hub, 0.25.2, Apache Software License
 humanfriendly, 10.0, MIT License
 hummingbird-ml, 0.4.11, MIT License
 idna, 3.10, BSD License

diff --git a/deps_licenses/licenses_mac_silicon_user.txt.md5 b/deps_licenses/licenses_mac_silicon_user.txt.md5
@@ -1 +1 @@
-ac76836858506534a0dc01cae9341f7d
+8ea8aec4f5aac03565c2dcb9f3f8a1da
diff --git a/docs/deep-learning/fhe_assistant.md b/docs/deep-learning/fhe_assistant.md
@@ -77,7 +77,7 @@ concrete_clf.compile(X, debug_config)
 
 #### 3. Quantization import failed
 
-**Error message**: `Error occurred during quantization aware training (QAT) import [...] Could not determine a unique scale for the quantization!`.
+**Error message**: `Error occurred during quantization aware training (QAT) import [...] Are you missing a QuantIdentity layer in your Brevitas model?`.
 
 **Cause**: This error occurs when the model imported as a quantized-aware training model lacks quantization operators. See [this guide](../deep-learning/fhe_friendly_models.md) on how to use Brevitas layers. This error message indicates that some layers do not take inputs quantized through `QuantIdentity` layers.
 

diff --git a/docs/guides/prediction_with_fhe.md b/docs/guides/prediction_with_fhe.md
@@ -112,11 +112,12 @@ class FCSmall(nn.Module):
         super().__init__()
         self.quant_input = qnn.QuantIdentity(bit_width=3)
         self.fc1 = qnn.QuantLinear(in_features=input_output, out_features=input_output, weight_bit_width=3, bias=True)
+        self.quant_2 = qnn.QuantIdentity(bit_width=3)
         self.act_f = nn.ReLU()
         self.fc2 = qnn.QuantLinear(in_features=input_output, out_features=input_output, weight_bit_width=3, bias=True)
 
     def forward(self, x):
-        return self.fc2(self.act_f(self.fc1(self.quant_input(x))))
+        return self.fc2(self.quant_2(self.act_f(self.fc1(self.quant_input(x)))))
 
 torch_model = FCSmall(3)
 

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -39,6 +39,9 @@ python = ">=3.8.1,<3.12"
 # https://python-poetry.org/docs/1.7/repositories#project-configuration
 # concrete-python = {version="==2.7.0", source = "zama-pypi-cpu"}
 concrete-python = {version="==2.8.1", source = "zama-pypi-cpu"}
+concrete-ml-extensions = [
+    {version = "0.1.2", platform = "linux" }
+]
 setuptools = "65.6.3"
 skops = {version = "0.5.0"}
 xgboost = "1.6.2"
@@ -152,6 +155,7 @@ filterwarnings = [
     "ignore:You are using `torch.load`*",
     "ignore:open_text is deprecated.*:DeprecationWarning",
     "ignore:read_text is deprecated.*:DeprecationWarning",
+    "ignore:open_binary is deprecated.*:DeprecationWarning",
 ]
 
 [tool.semantic_release]

diff --git a/script/make_utils/licenses.sh b/script/make_utils/licenses.sh
@@ -161,6 +161,7 @@ then
     # And check with a white-list
     # Brevitas has an "UNKNOWN" license, but is actually a BSD, so it is ignored in this test
     # pkg-resources reports UNKNOWN due to a Ubuntu bug, but is Apache - ignore
+    # concrete-ml-extensions has the same license as Concrete ML, so skip checking
     LICENSES_WHITELIST="new BSD 3-Clause"
     LICENSES_WHITELIST="${LICENSES_WHITELIST};3-Clause BSD License"
     LICENSES_WHITELIST="${LICENSES_WHITELIST};new BSD"
@@ -181,7 +182,7 @@ then
     LICENSES_WHITELIST="${LICENSES_WHITELIST};ISC License (ISCL)"
     LICENSES_WHITELIST="${LICENSES_WHITELIST};The Unlicense (Unlicense)"
 
-    pip-licenses --allow-only="${LICENSES_WHITELIST}" --ignore-packages brevitas pkg-resources pkg_resources concrete-ml-extensions-brevitas
+    pip-licenses --allow-only="${LICENSES_WHITELIST}" --ignore-packages brevitas pkg-resources pkg_resources concrete-ml-extensions
 
     deactivate
 

diff --git a/src/concrete/ml/common/utils.py b/src/concrete/ml/common/utils.py
@@ -105,6 +105,17 @@ def is_valid(fhe: Union["FheMode", str]) -> bool:
         return fhe in FheMode.__members__.values()
 
 
+class HybridFHEMode(enum.Enum):
+    """Simple enum for different modes of execution of HybridModel."""
+
+    DISABLE = "disable"  # Use torch weights
+    REMOTE = "remote"  # Use remote FHE server
+    SIMULATE = "simulate"  # Use FHE simulation
+    CALIBRATE = "calibrate"  # Use calibration (to run before FHE compilation)
+    EXECUTE = "execute"  # Use FHE execution
+    TORCH = "torch"  # Use torch layers
+
+
 def replace_invalid_arg_name_chars(arg_name: str) -> str:
     """Sanitize arg_name, replacing invalid chars by _.
 

diff --git a/src/concrete/ml/pytest/torch_models.py b/src/concrete/ml/pytest/torch_models.py
@@ -63,11 +63,13 @@ def forward(self, inputs):
 class FCSmall(nn.Module):
     """Torch model for the tests."""
 
-    def __init__(self, input_output, activation_function):
+    def __init__(self, input_output, activation_function, hidden=None):
         super().__init__()
-        self.fc1 = nn.Linear(in_features=input_output, out_features=input_output)
+
+        hidden_size = input_output if hidden is None else hidden
+        self.fc1 = nn.Linear(in_features=input_output, out_features=hidden_size)
         self.act_f = activation_function()
-        self.fc2 = nn.Linear(in_features=input_output, out_features=input_output)
+        self.fc2 = nn.Linear(in_features=hidden_size, out_features=input_output)
 
     def forward(self, x):
         """Forward pass.
@@ -850,7 +852,7 @@ def forward(self, x):
         return x
 
 
-class SimpleQAT(nn.Module):
+class StepFunctionPTQ(nn.Module):
     """Torch model implements a step function that needs Greater, Cast and Where."""
 
     def __init__(self, input_output, activation_function, n_bits=2, disable_bit_check=False):
@@ -1354,17 +1356,17 @@ def __init__(
         super().__init__()
 
         self.n_blocks = n_blocks
-        self.quant_1 = qnn.QuantIdentity(bit_width=n_bits, return_quant_tensor=True)
+        self.quant_1 = qnn.QuantIdentity(bit_width=n_bits, return_quant_tensor=False)
         self.fc1 = qnn.QuantLinear(input_shape, hidden_shape, bias=False, weight_bit_width=n_bits)
 
-        self.quant_concat = qnn.QuantIdentity(bit_width=n_bits, return_quant_tensor=True)
+        self.quant_concat = qnn.QuantIdentity(bit_width=n_bits, return_quant_tensor=False)
 
-        self.quant_2 = qnn.QuantIdentity(bit_width=n_bits, return_quant_tensor=True)
+        self.quant_2 = qnn.QuantIdentity(bit_width=n_bits, return_quant_tensor=False)
         self.fc2 = qnn.QuantLinear(
             hidden_shape * self.n_blocks, hidden_shape, bias=True, weight_bit_width=n_bits
         )
 
-        self.quant_3 = qnn.QuantIdentity(bit_width=n_bits, return_quant_tensor=True)
+        self.quant_3 = qnn.QuantIdentity(bit_width=n_bits, return_quant_tensor=False)
         self.fc4 = qnn.QuantLinear(hidden_shape, output_shape, bias=True, weight_bit_width=n_bits)
 
     def forward(self, x):
@@ -1379,9 +1381,9 @@ def forward(self, x):
         x_pre = []
 
         for i in range(self.n_blocks):
-            x_block = x[:, i, :]
-            q1_out = self.quant_1(x_block)
-            fc1_out = self.fc1(q1_out)
+            q_x = self.quant_1(x)
+            q_x_block = q_x[:, i, :]
+            fc1_out = self.fc1(q_x_block)
             q_concat_out = self.quant_concat(fc1_out)
 
             x_pre.append(q_concat_out)

diff --git a/src/concrete/ml/quantization/base_quantized_op.py b/src/concrete/ml/quantization/base_quantized_op.py
@@ -18,6 +18,7 @@
     QuantizationOptions,
     QuantizedArray,
     UniformQuantizationParameters,
+    UniformQuantizer,
 )
 
 # pylint: disable=too-many-lines
@@ -559,7 +560,10 @@ def _prepare_quantized_input(self, input_: QuantizedArray) -> QuantizedArray:
         # but when parsing the ONNX graph, some options can be overwritten. Thus
         # when evaluating QAT layers we ignore one of these options to allow the
         # override.
-        if quant_opts.is_equal(input_.quantizer.quant_options, ignore_sign_qat=True):
+        if (
+            quant_opts.is_equal(input_.quantizer.quant_options, ignore_sign_qat=True)
+            or input_.quantizer.quant_options.is_precomputed_qat
+        ):
             # Pass-through the input quantizer when the input is already quantized in
             # the manner that this op requires: this makes the op use the qvalues directly,
             # in q_impl and will avoid a TLU to re-quantize.
@@ -661,7 +665,9 @@ def _prepare_inputs_with_constants(
             elif calibrate or is_clear_value:
                 # This is used during calibration with numpy.ndarrays
                 # or then the input is raw (not quantized)
-                prepared_inputs[curr_input_fill_idx] = input_
+                prepared_inputs[curr_input_fill_idx] = (
+                    input_.values if isinstance(input_, QuantizedArray) else input_
+                )
             elif quantize_actual_values:
                 # This is used by mixing (conv/gemm) or value re-arranging ops (reshape)
                 input_ = cast(QuantizedArray, input_)
@@ -674,9 +680,6 @@ def _prepare_inputs_with_constants(
                     new_input.quantizer.is_qat
                     and not input_.quantizer.is_precomputed_qat
                     and self.error_tracker is not None
-                    and not new_input.quantizer.check_is_uniform_quantized(
-                        new_input.quantizer.quant_options
-                    )
                 ):
                     self.error_tracker.append(input_idx)
 
@@ -700,7 +703,7 @@ def _prepare_inputs_with_constants(
 
         return prepared_inputs
 
-    def calibrate(self, *inputs: numpy.ndarray) -> numpy.ndarray:
+    def calibrate(self, *inputs: Union[QuantizedArray, numpy.ndarray]) -> numpy.ndarray:
         """Create corresponding QuantizedArray for the output of the activation function.
 
         Args:
@@ -712,6 +715,8 @@ def calibrate(self, *inputs: numpy.ndarray) -> numpy.ndarray:
 
         # Here we need the actual values of the constants, we need to pass through
         # the numpy.ndarrays in the computation graph
+        # Mixing ops may be calibrated using QuantizedArray inputs, in order
+        # to pre-compute anlytical output quantization
         prepared_inputs = self._prepare_inputs_with_constants(
             *inputs, calibrate=True, quantize_actual_values=False
         )
@@ -720,12 +725,48 @@ def calibrate(self, *inputs: numpy.ndarray) -> numpy.ndarray:
         if isinstance(raw_result, RawOpOutput):
             return raw_result
 
-        quantized_samples = QuantizedArray(self.n_bits, raw_result)
+        # If the caller passes only QuantizedArray it means
+        # that they are asking to quantized using analytical
+        # formulas
+        requested_analytical_quant = all(
+            isinstance(qv, QuantizedArray) for qv in inputs
+        ) and isinstance(self, QuantizedMixingOp)
+        if requested_analytical_quant:
+            assert_true(
+                self.supported_by_linear_backend(),
+                "Calibration using QuantizedArray is only possible"
+                " for operations that can calibrate analytically",
+            )
+            q_prepared_inputs = self._prepare_inputs_with_constants(
+                *inputs, calibrate=False, quantize_actual_values=True
+            )
+            quantizer = self.calibrate_analytical_output(*q_prepared_inputs)
+            self.output_quant_params = quantizer.quant_params
+            self.output_quant_stats = quantizer.quant_stats
+        else:
+            # These output quantization parameters are only used
+            # for operations that produce graph output operation
+            # and are a non-linear
+            quantized_samples = QuantizedArray(self.n_bits, raw_result)
+
+            self.output_quant_params = quantized_samples.quantizer.quant_params
+            self.output_quant_stats = quantized_samples.quantizer.quant_stats
+
+        return raw_result
 
-        self.output_quant_params = quantized_samples.quantizer.quant_params
-        self.output_quant_stats = quantized_samples.quantizer.quant_stats
+    def calibrate_analytical_output(self, *inputs: QuantizedArray) -> UniformQuantizer:
+        """Calibrate output quantization based on analytical formulas.
 
-        return quantized_samples.values
+        Args:
+            *inputs (QuantizedArray): quantized operation inputs. Quantized weights
+                are storea in the op instance
+
+        Raises:
+            AssertionError: if the operation does not support analytical calibration
+        """
+        raise AssertionError(
+            f"calibrate_analytical_output: not implemented for {self._impl_for_op_named} op"
+        )
 
     def prepare_output(self, qoutput_activation: numpy.ndarray) -> QuantizedArray:
         """Quantize the output of the activation function.
@@ -817,6 +858,15 @@ def _get_output_quant_opts(self):
             output_quant_opts.is_qat = False
         return output_quant_opts
 
+    @classmethod
+    def supported_by_linear_backend(cls) -> bool:
+        """Indicate if this op can be executed on the GLWE linear backend.
+
+        Returns:
+            bool: True if the op can be executed with GLWE.
+        """
+        return False
+
 
 class QuantizedOpUnivariateOfEncrypted(QuantizedOp, is_utility=True):
     """An univariate operator of an encrypted value.
@@ -931,11 +981,6 @@ def make_output_quant_parameters(
         Returns:
             QuantizedArray: the quantized array that will be passed to the QuantizedModule output.
         """
-
-        out_opts = self._get_output_quant_opts()
-        out_opts.is_signed = False
-        out_opts.is_symmetric = False
-
         # Since we don't know the real bit-width of these quantized values,
         # return a quantizer that has zero offset
         out_params = UniformQuantizationParameters(
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		ac76836858506534a0dc01cae9341f7d
		8ea8aec4f5aac03565c2dcb9f3f8a1da