diff --git a/src/concrete/ml/torch/hybrid_model.py b/src/concrete/ml/torch/hybrid_model.py index 6af89f2d6..e8bbf6c34 100644 --- a/src/concrete/ml/torch/hybrid_model.py +++ b/src/concrete/ml/torch/hybrid_model.py @@ -267,6 +267,7 @@ def forward(self, x: torch.Tensor) -> Union[torch.Tensor, QuantTensor]: elif self.fhe_local_mode == HybridFHEMode.REMOTE: # pragma:no cover # Remote call + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4672 assert self.executor is None, "Remote optimized linear layers are not yet implemented" y = self.remote_call(x) @@ -410,8 +411,15 @@ def _replace_modules(self): # Optimal input dimension is 2048, below 512 the performance are too low. if is_pure_linear_layer: module = self.private_modules[module_name] - input_dim = module.in_features if hasattr(module, "in_features") else 0 - output_dim = module.out_features if hasattr(module, "out_features") else 0 + # Use weight shape instead of in/out_features + if hasattr(module, "weight"): + input_dim = module.weight.shape[ + 1 + ] # Input dimension is second dimension for Linear layers + output_dim = module.weight.shape[0] # Output dimension is first dimension + else: + input_dim = output_dim = 0 + is_pure_linear_layer = ( is_pure_linear_layer and input_dim >= 512 and output_dim >= 512 ) @@ -582,6 +590,7 @@ def compile_model( # then simply quantize the model without compiling with # Concrete Python. if self._has_only_large_linear_layers and _HAS_GLWE_BACKEND: + self.executor = GLWELinearLayerExecutor() self.private_q_modules[name] = build_quantized_module( self.private_modules[name], calibration_data_tensor, @@ -637,7 +646,15 @@ def save_and_clear_private_info(self, path: Path, via_mlir=True): path (Path): The directory where the model and the FHE circuit will be saved. via_mlir (bool): if fhe circuits should be serialized using via_mlir option useful for cross-platform (compile on one architecture and run on another) + + Raises: + NotImplementedError: GLWE backend deployment is not yet supported """ + # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4672 + # GLWE backend deployment is not yet supported + if self.executor is not None: + raise NotImplementedError("GLWE backend deployment is not yet supported") + path = Path(path) path.mkdir(parents=True, exist_ok=True) diff --git a/tests/torch/test_hybrid_converter.py b/tests/torch/test_hybrid_converter.py index de5724c6d..110d5d5b2 100644 --- a/tests/torch/test_hybrid_converter.py +++ b/tests/torch/test_hybrid_converter.py @@ -73,6 +73,7 @@ def run_hybrid_llm_test( # Create a hybrid model hybrid_model = HybridFHEModel(model, module_names) + is_compiled = False try: hybrid_model.compile_model( inputs, @@ -81,6 +82,7 @@ def run_hybrid_llm_test( rounding_threshold_bits=8, configuration=configuration, ) + is_compiled = True except RuntimeError as error: # When reshaping adds PBSs we sometimes encounter NoParametersFound # when compiling. In this case we skip the rest since we can't simulate @@ -153,10 +155,20 @@ def run_hybrid_llm_test( # Get the temp directory path if not has_pbs and glwe_backend_installed: - # Deployment of GLWE backend hybrid models is not yet supported - with pytest.raises(AttributeError, match="The quantized module is not compiled.*"): - hybrid_model.save_and_clear_private_info(temp_dir_path) + if is_compiled: + # Deployment of GLWE backend hybrid models is not yet supported + with pytest.raises( + NotImplementedError, match="GLWE backend deployment is not yet supported" + ): + hybrid_model.save_and_clear_private_info(temp_dir_path) + else: + # Check that we get an error when trying to save a non-compiled model + with pytest.raises( + AttributeError, + match="The quantized module is not compiled. Please run compile*", + ): + hybrid_model.save_and_clear_private_info(temp_dir_path) else: hybrid_model.save_and_clear_private_info(temp_dir_path)