From 476e9399307b8f260548ee71d9a62f6da8eb7213 Mon Sep 17 00:00:00 2001 From: jfrery Date: Mon, 3 Jun 2024 15:34:39 +0200 Subject: [PATCH] chore: fix batchn norm per channel with clipping --- src/concrete/ml/quantization/quantized_ops.py | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/src/concrete/ml/quantization/quantized_ops.py b/src/concrete/ml/quantization/quantized_ops.py index a275e04a59..a89c7c9eb7 100644 --- a/src/concrete/ml/quantization/quantized_ops.py +++ b/src/concrete/ml/quantization/quantized_ops.py @@ -1612,6 +1612,49 @@ class QuantizedBatchNormalization(QuantizedOp): _impl_for_op_named: str = "BatchNormalization" + def calibrate(self, *inputs: numpy.ndarray) -> numpy.ndarray: + """Create corresponding QuantizedArray for the output of the activation function. + + Args: + *inputs (numpy.ndarray): Calibration sample inputs. + + Returns: + numpy.ndarray: the output values for the provided calibration samples. + """ + + # Here we need the actual values of the constants, we need to pass through + # the numpy.ndarrays in the computation graph + prepared_inputs = self._prepare_inputs_with_constants( + *inputs, calibrate=True, quantize_actual_values=False + ) + + raw_result = self.call_impl(*prepared_inputs, **self.attrs) + if isinstance(raw_result, RawOpOutput): + return raw_result + + # Check if batch normalization is applied per channel + scale = self.constant_inputs[self._params_name_to_input_idx["scale"]].values + bias = self.constant_inputs[self._params_name_to_input_idx["bias"]].values + if scale.size > 1 or bias.size > 1: + # Per channel batchnorm behave poorly with low bit-width quantization. + # This is because per channel batchnorm can have paramters (scale / bias / mean / var) + # order of magnitude different between channels. But our tensor based quantization + # only allows us to provide a global scale and offset. Any error in quantized values + # could be dramatic. + # To avoid this, we use percentiles to clip the extreme values. + + lower_bound = numpy.percentile(raw_result, 0.1) + upper_bound = numpy.percentile(raw_result, 99.9) + + raw_result = numpy.clip(raw_result, lower_bound, upper_bound) + + quantized_samples = QuantizedArray(self.n_bits, raw_result) + + self.output_quant_params = quantized_samples.quantizer.quant_params + self.output_quant_stats = quantized_samples.quantizer.quant_stats + + return quantized_samples.values + class QuantizedFlatten(QuantizedOp): """Quantized flatten for encrypted inputs."""