diff --git a/.gitleaksignore b/.gitleaksignore
index 1356df088..a1c87598d 100644
--- a/.gitleaksignore
+++ b/.gitleaksignore
@@ -10,3 +10,4 @@ f41de03048a9ed27946b875e81b34138bb4bb17b:use_case_examples/training/analyze.ipyn
 e2904473898ddd325f245f4faca526a0e9520f49:builders/Dockerfile.zamalang-env:generic-api-key:5
 7d5e885816f1f1e432dd94da38c5c8267292056a:docs/advanced_examples/XGBRegressor.ipynb:aws-access-token:1026
 25c5e7abaa7382520af3fb7a64266e193b1f6a59:poetry.lock:square-access-token:6401
+eebd4bea78f6dd2361baa7f94f68ae4cba8b9fe8:tests/deployment/test_deployment.py:generic-api-key:20
\ No newline at end of file
diff --git a/docs/advanced_examples/LoraMLP.ipynb b/docs/advanced_examples/LoraMLP.ipynb
index af17b90fc..7b6dc6e7c 100644
--- a/docs/advanced_examples/LoraMLP.ipynb
+++ b/docs/advanced_examples/LoraMLP.ipynb
@@ -21,7 +21,7 @@
     {
      "data": {
       "text/plain": [
-       "<torch._C.Generator at 0x7fa754b0e250>"
+       "<torch._C.Generator at 0x7ffa268f2530>"
       ]
      },
      "execution_count": 1,
@@ -324,7 +324,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r\n",
+      "\r",
       "Training:   0%|          | 0/10 [00:00<?, ?epoch/s]"
      ]
     },
@@ -332,168 +332,168 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r\n",
-      "Training:   0%|          | 0/10 [00:30<?, ?epoch/s, Avg Loss=2.3775, Time=30.94s, FHE Mode=execute]"
+      "\r",
+      "Training:   0%|          | 0/10 [00:34<?, ?epoch/s, Epoch=1, Avg Loss=2.3775, Time=34.38s, FHE Mode=execute]"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r\n",
-      "Training:  10%|█         | 1/10 [00:30<04:38, 30.95s/epoch, Avg Loss=2.3775, Time=30.94s, FHE Mode=execute]"
+      "\r",
+      "Training:  10%|█         | 1/10 [00:34<05:09, 34.38s/epoch, Epoch=1, Avg Loss=2.3775, Time=34.38s, FHE Mode=execute]"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r\n",
-      "Training:  10%|█         | 1/10 [01:01<04:38, 30.95s/epoch, Avg Loss=1.6292, Time=30.71s, FHE Mode=execute]"
+      "\r",
+      "Training:  10%|█         | 1/10 [01:07<05:09, 34.38s/epoch, Epoch=2, Avg Loss=1.6292, Time=32.99s, FHE Mode=execute]"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r\n",
-      "Training:  20%|██        | 2/10 [01:01<04:06, 30.81s/epoch, Avg Loss=1.6292, Time=30.71s, FHE Mode=execute]"
+      "\r",
+      "Training:  20%|██        | 2/10 [01:07<04:28, 33.56s/epoch, Epoch=2, Avg Loss=1.6292, Time=32.99s, FHE Mode=execute]"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r\n",
-      "Training:  20%|██        | 2/10 [01:32<04:06, 30.81s/epoch, Avg Loss=0.8214, Time=30.98s, FHE Mode=execute]"
+      "\r",
+      "Training:  20%|██        | 2/10 [01:39<04:28, 33.56s/epoch, Epoch=3, Avg Loss=0.8214, Time=31.86s, FHE Mode=execute]"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r\n",
-      "Training:  30%|███       | 3/10 [01:32<03:36, 30.89s/epoch, Avg Loss=0.8214, Time=30.98s, FHE Mode=execute]"
+      "\r",
+      "Training:  30%|███       | 3/10 [01:39<03:49, 32.79s/epoch, Epoch=3, Avg Loss=0.8214, Time=31.86s, FHE Mode=execute]"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r\n",
-      "Training:  30%|███       | 3/10 [02:03<03:36, 30.89s/epoch, Avg Loss=0.5415, Time=30.74s, FHE Mode=execute]"
+      "\r",
+      "Training:  30%|███       | 3/10 [02:10<03:49, 32.79s/epoch, Epoch=4, Avg Loss=0.5415, Time=31.45s, FHE Mode=execute]"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r\n",
-      "Training:  40%|████      | 4/10 [02:03<03:04, 30.83s/epoch, Avg Loss=0.5415, Time=30.74s, FHE Mode=execute]"
+      "\r",
+      "Training:  40%|████      | 4/10 [02:10<03:13, 32.26s/epoch, Epoch=4, Avg Loss=0.5415, Time=31.45s, FHE Mode=execute]"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r\n",
-      "Training:  40%|████      | 4/10 [02:34<03:04, 30.83s/epoch, Avg Loss=0.3884, Time=30.87s, FHE Mode=execute]"
+      "\r",
+      "Training:  40%|████      | 4/10 [02:42<03:13, 32.26s/epoch, Epoch=5, Avg Loss=0.3884, Time=31.78s, FHE Mode=execute]"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r\n",
-      "Training:  50%|█████     | 5/10 [02:34<02:34, 30.85s/epoch, Avg Loss=0.3884, Time=30.87s, FHE Mode=execute]"
+      "\r",
+      "Training:  50%|█████     | 5/10 [02:42<02:40, 32.09s/epoch, Epoch=5, Avg Loss=0.3884, Time=31.78s, FHE Mode=execute]"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r\n",
-      "Training:  50%|█████     | 5/10 [03:05<02:34, 30.85s/epoch, Avg Loss=0.3246, Time=30.80s, FHE Mode=execute]"
+      "\r",
+      "Training:  50%|█████     | 5/10 [03:14<02:40, 32.09s/epoch, Epoch=6, Avg Loss=0.3246, Time=32.02s, FHE Mode=execute]"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r\n",
-      "Training:  60%|██████    | 6/10 [03:05<02:03, 30.83s/epoch, Avg Loss=0.3246, Time=30.80s, FHE Mode=execute]"
+      "\r",
+      "Training:  60%|██████    | 6/10 [03:14<02:08, 32.07s/epoch, Epoch=6, Avg Loss=0.3246, Time=32.02s, FHE Mode=execute]"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r\n",
-      "Training:  60%|██████    | 6/10 [03:35<02:03, 30.83s/epoch, Avg Loss=0.3145, Time=30.63s, FHE Mode=execute]"
+      "\r",
+      "Training:  60%|██████    | 6/10 [03:45<02:08, 32.07s/epoch, Epoch=7, Avg Loss=0.3145, Time=31.47s, FHE Mode=execute]"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r\n",
-      "Training:  70%|███████   | 7/10 [03:35<01:32, 30.77s/epoch, Avg Loss=0.3145, Time=30.63s, FHE Mode=execute]"
+      "\r",
+      "Training:  70%|███████   | 7/10 [03:45<01:35, 31.87s/epoch, Epoch=7, Avg Loss=0.3145, Time=31.47s, FHE Mode=execute]"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r\n",
-      "Training:  70%|███████   | 7/10 [04:06<01:32, 30.77s/epoch, Avg Loss=0.2942, Time=30.63s, FHE Mode=execute]"
+      "\r",
+      "Training:  70%|███████   | 7/10 [04:17<01:35, 31.87s/epoch, Epoch=8, Avg Loss=0.2942, Time=31.38s, FHE Mode=execute]"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r\n",
-      "Training:  80%|████████  | 8/10 [04:06<01:01, 30.73s/epoch, Avg Loss=0.2942, Time=30.63s, FHE Mode=execute]"
+      "\r",
+      "Training:  80%|████████  | 8/10 [04:17<01:03, 31.72s/epoch, Epoch=8, Avg Loss=0.2942, Time=31.38s, FHE Mode=execute]"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r\n",
-      "Training:  80%|████████  | 8/10 [04:36<01:01, 30.73s/epoch, Avg Loss=0.2913, Time=30.59s, FHE Mode=execute]"
+      "\r",
+      "Training:  80%|████████  | 8/10 [04:49<01:03, 31.72s/epoch, Epoch=9, Avg Loss=0.2913, Time=31.65s, FHE Mode=execute]"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r\n",
-      "Training:  90%|█████████ | 9/10 [04:36<00:30, 30.68s/epoch, Avg Loss=0.2913, Time=30.59s, FHE Mode=execute]"
+      "\r",
+      "Training:  90%|█████████ | 9/10 [04:49<00:31, 31.70s/epoch, Epoch=9, Avg Loss=0.2913, Time=31.65s, FHE Mode=execute]"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r\n",
-      "Training:  90%|█████████ | 9/10 [05:06<00:30, 30.68s/epoch, Avg Loss=0.2978, Time=29.99s, FHE Mode=execute]"
+      "\r",
+      "Training:  90%|█████████ | 9/10 [05:20<00:31, 31.70s/epoch, Epoch=10, Avg Loss=0.2978, Time=31.63s, FHE Mode=execute]"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r\n",
-      "Training: 100%|██████████| 10/10 [05:06<00:00, 30.47s/epoch, Avg Loss=0.2978, Time=29.99s, FHE Mode=execute]"
+      "\r",
+      "Training: 100%|██████████| 10/10 [05:20<00:00, 31.68s/epoch, Epoch=10, Avg Loss=0.2978, Time=31.63s, FHE Mode=execute]"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r\n",
-      "Training: 100%|██████████| 10/10 [05:06<00:00, 30.69s/epoch, Avg Loss=0.2978, Time=29.99s, FHE Mode=execute]"
+      "\r",
+      "Training: 100%|██████████| 10/10 [05:20<00:00, 32.06s/epoch, Epoch=10, Avg Loss=0.2978, Time=31.63s, FHE Mode=execute]"
      ]
     },
     {
diff --git a/src/concrete/ml/torch/hybrid_model.py b/src/concrete/ml/torch/hybrid_model.py
index ae2b885e8..e8bbf6c34 100644
--- a/src/concrete/ml/torch/hybrid_model.py
+++ b/src/concrete/ml/torch/hybrid_model.py
@@ -2,7 +2,6 @@
 
 # pylint: disable=too-many-lines
 import ast
-import contextvars
 import io
 import sys
 import time
@@ -102,13 +101,6 @@ def convert_conv1d_to_linear(layer_or_module):
     return layer_or_module
 
 
-# This module member is instantiated by the Hybrid FHE model
-# when hybrid FHE forward is called and the GLWE backend is available
-_optimized_linear_executor: contextvars.ContextVar[Optional[GLWELinearLayerExecutor]] = (
-    contextvars.ContextVar("optimized_linear_executor")
-)
-
-
 # pylint: disable-next=too-many-instance-attributes
 class RemoteModule(nn.Module):
     """A wrapper class for the modules to be evaluated remotely with FHE."""
@@ -136,6 +128,7 @@ def __init__(
         self.model_name: Optional[str] = model_name
         self.verbose = verbose
         self.optimized_linear_execution = optimized_linear_execution
+        self.executor: Optional[GLWELinearLayerExecutor] = None
 
     def init_fhe_client(
         self, path_to_client: Optional[Path] = None, path_to_keys: Optional[Path] = None
@@ -252,15 +245,10 @@ def forward(self, x: torch.Tensor) -> Union[torch.Tensor, QuantTensor]:
         }:
             assert self.private_q_module is not None
 
-            try:
-                optimized_linear_layer_executor = _optimized_linear_executor.get()
-            except LookupError:
-                optimized_linear_layer_executor = None
-
-            if optimized_linear_layer_executor:
+            if self.executor:
                 # Delegate to the optimized GLWE executor
                 y = torch.Tensor(
-                    optimized_linear_layer_executor.forward(
+                    self.executor.forward(
                         x.detach().numpy(), self.private_q_module, self.fhe_local_mode
                     )
                 )
@@ -269,6 +257,7 @@ def forward(self, x: torch.Tensor) -> Union[torch.Tensor, QuantTensor]:
                 y = torch.Tensor(
                     self.private_q_module.forward(x.detach().numpy(), fhe=self.fhe_local_mode.value)
                 )
+
         elif self.fhe_local_mode == HybridFHEMode.CALIBRATE:
             # Calling torch + gathering calibration data
             assert self.private_module is not None
@@ -278,14 +267,8 @@ def forward(self, x: torch.Tensor) -> Union[torch.Tensor, QuantTensor]:
 
         elif self.fhe_local_mode == HybridFHEMode.REMOTE:  # pragma:no cover
             # Remote call
-            try:
-                optimized_linear_layer_executor = _optimized_linear_executor.get()
-            except LookupError:
-                optimized_linear_layer_executor = None
-
-            assert optimized_linear_layer_executor is None, (
-                "Remote optimized linear layers " "are not yet implemented"
-            )
+            # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4672
+            assert self.executor is None, "Remote optimized linear layers are not yet implemented"
             y = self.remote_call(x)
 
         elif self.fhe_local_mode == HybridFHEMode.TORCH:
@@ -400,13 +383,14 @@ def __init__(
         self.configuration: Optional[Configuration] = None
         self.model_name = model_name
         self.verbose = verbose
+        self.executor: Optional[GLWELinearLayerExecutor] = None
 
         self._replace_modules()
 
     def _replace_modules(self):
         """Replace the private modules in the model with remote layers."""
 
-        self._all_layers_are_pure_linear = True
+        self._has_only_large_linear_layers = True
         for module_name in self.module_names:
             # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/3858
             # Conv1d introduce reshaping operations which adds more TLU
@@ -421,8 +405,27 @@ def _replace_modules(self):
                 self.private_modules[module_name],
                 (nn.Linear, ForwardModuleLinear, BackwardModuleLinear),
             )
+
+            # Check input dimensions for linear layers
+            # If the input dimension is less than 512 we do not use the GLWE optimization.
+            # Optimal input dimension is 2048, below 512 the performance are too low.
+            if is_pure_linear_layer:
+                module = self.private_modules[module_name]
+                # Use weight shape instead of in/out_features
+                if hasattr(module, "weight"):
+                    input_dim = module.weight.shape[
+                        1
+                    ]  # Input dimension is second dimension for Linear layers
+                    output_dim = module.weight.shape[0]  # Output dimension is first dimension
+                else:
+                    input_dim = output_dim = 0
+
+                is_pure_linear_layer = (
+                    is_pure_linear_layer and input_dim >= 512 and output_dim >= 512
+                )
+
             if not is_pure_linear_layer:
-                self._all_layers_are_pure_linear = False
+                self._has_only_large_linear_layers = False
 
         for module_name in self.module_names:
             # Create the optimized glwe linear layer executor if needed
@@ -432,7 +435,7 @@ def _replace_modules(self):
                 module_name=module_name,
                 model_name=self.model_name,
                 verbose=self.verbose,
-                optimized_linear_execution=self._all_layers_are_pure_linear,
+                optimized_linear_execution=(self._has_only_large_linear_layers),
             )
 
             self.remote_modules[module_name] = remote_module
@@ -462,7 +465,7 @@ def forward(self, x: torch.Tensor, fhe: str = "disable") -> torch.Tensor:
         # Validate the FHE mode
         fhe_mode = HybridFHEMode(fhe)
 
-        if _HAS_GLWE_BACKEND and self._all_layers_are_pure_linear:
+        if _HAS_GLWE_BACKEND and self._has_only_large_linear_layers:
             if fhe_mode == HybridFHEMode.SIMULATE:
                 raise AssertionError(
                     "When the HybridFHEModel is instantiated with only "
@@ -470,23 +473,20 @@ def forward(self, x: torch.Tensor, fhe: str = "disable") -> torch.Tensor:
                 )
 
             if fhe_mode in (HybridFHEMode.EXECUTE, HybridFHEMode.REMOTE, HybridFHEMode.DISABLE):
-                # If all layers are pure linear, enable the GLWE optimization for all layers
-                # and generate an encryption and compression key for all layers
-                # as they share crypto-parameters
+                # Initialize executor only if not already done
+                if self.executor is None:
+                    self.executor = GLWELinearLayerExecutor()
 
-                # Loading keys from a file could be done here, and the
-                # keys could be passed as arguments to the Executor
-                executor = GLWELinearLayerExecutor()
+                # Generate keys only if needed and not already done
+                if fhe_mode != HybridFHEMode.DISABLE and self.executor.private_key is None:
+                    self.executor.keygen()
 
-                if fhe_mode != HybridFHEMode.DISABLE:
-                    executor.keygen()
-
-                _optimized_linear_executor.set(executor)
+        # Update executor for all remote modules
+        for module in self.remote_modules.values():
+            module.executor = self.executor
 
         result = self.model(x)
 
-        _optimized_linear_executor.set(None)
-
         return result
 
     def __call__(self, x: torch.Tensor, fhe: str = "disable") -> torch.Tensor:
@@ -589,7 +589,8 @@ def compile_model(
                 # If all layers are linear and the GLWE backend is available
                 # then simply quantize the model without compiling with
                 # Concrete Python.
-                if self._all_layers_are_pure_linear and _HAS_GLWE_BACKEND:
+                if self._has_only_large_linear_layers and _HAS_GLWE_BACKEND:
+                    self.executor = GLWELinearLayerExecutor()
                     self.private_q_modules[name] = build_quantized_module(
                         self.private_modules[name],
                         calibration_data_tensor,
@@ -645,7 +646,15 @@ def save_and_clear_private_info(self, path: Path, via_mlir=True):
             path (Path): The directory where the model and the FHE circuit will be saved.
             via_mlir (bool): if fhe circuits should be serialized using via_mlir option
                 useful for cross-platform (compile on one architecture and run on another)
+
+        Raises:
+            NotImplementedError: GLWE backend deployment is not yet supported
         """
+        # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4672
+        # GLWE backend deployment is not yet supported
+        if self.executor is not None:
+            raise NotImplementedError("GLWE backend deployment is not yet supported")
+
         path = Path(path)
         path.mkdir(parents=True, exist_ok=True)
 
diff --git a/tests/torch/test_hybrid_converter.py b/tests/torch/test_hybrid_converter.py
index 821903bcf..67af03037 100644
--- a/tests/torch/test_hybrid_converter.py
+++ b/tests/torch/test_hybrid_converter.py
@@ -1,5 +1,6 @@
 """Tests for the hybrid model converter."""
 
+import importlib
 import sys
 import tempfile
 from pathlib import Path
@@ -9,7 +10,6 @@
 import pytest
 import torch
 from concrete.fhe import Configuration
-from sklearn.datasets import make_moons
 from sklearn.model_selection import train_test_split
 from transformers import GPT2LMHeadModel, GPT2Tokenizer
 
@@ -65,15 +65,16 @@ def run_hybrid_llm_test(
             if has_pbs_reshape:
                 has_pbs = True
 
-        # Propagate glwe_backend_installed state being tested to constants of affected modules
-        for affected_module in (
-            concrete.ml.quantization.linear_op_glwe_backend,
-            concrete.ml.torch.hybrid_model,
-        ):
-            m.setattr(affected_module, "_HAS_GLWE_BACKEND", glwe_backend_installed)
+        # Patching for GLWE backend
+        if not glwe_backend_installed:
+            m.setitem(sys.modules, "concrete_ml_extensions", None)
+
+        # Reload the affected modules to ensure the changes take effect
+        importlib.reload(concrete.ml.quantization.linear_op_glwe_backend)
+        importlib.reload(concrete.ml.torch.hybrid_model)
 
-        # Create a hybrid model
         hybrid_model = HybridFHEModel(model, module_names)
+        is_compiled = False
         try:
             hybrid_model.compile_model(
                 inputs,
@@ -82,6 +83,7 @@ def run_hybrid_llm_test(
                 rounding_threshold_bits=8,
                 configuration=configuration,
             )
+            is_compiled = True
         except RuntimeError as error:
             # When reshaping adds PBSs we sometimes encounter NoParametersFound
             # when compiling. In this case we skip the rest since we can't simulate
@@ -154,10 +156,20 @@ def run_hybrid_llm_test(
         # Get the temp directory path
 
         if not has_pbs and glwe_backend_installed:
-            # Deployment of GLWE backend hybrid models is not yet supported
-            with pytest.raises(AttributeError, match="The quantized module is not compiled.*"):
-                hybrid_model.save_and_clear_private_info(temp_dir_path)
 
+            if is_compiled:
+                # Deployment of GLWE backend hybrid models is not yet supported
+                with pytest.raises(
+                    NotImplementedError, match="GLWE backend deployment is not yet supported"
+                ):
+                    hybrid_model.save_and_clear_private_info(temp_dir_path)
+            else:
+                # Check that we get an error when trying to save a non-compiled model
+                with pytest.raises(
+                    AttributeError,
+                    match="The quantized module is not compiled. Please run compile*",
+                ):
+                    hybrid_model.save_and_clear_private_info(temp_dir_path)
         else:
             hybrid_model.save_and_clear_private_info(temp_dir_path)
 
@@ -279,7 +291,7 @@ def test_invalid_model():
         HybridFHEModel(invalid_model, module_names="sub_module")
 
 
-@pytest.mark.parametrize("n_hidden", [512, 2048])
+@pytest.mark.parametrize("n_hidden", [256, 512, 2048])
 def test_hybrid_glwe_correctness(n_hidden):
     """Tests that the GLWE backend produces correct results for the hybrid model."""
 
@@ -295,13 +307,15 @@ def prepare_data(x, y, test_size=0.1, random_state=42):
         y_test = torch.tensor(y_test, dtype=torch.long)
         return x_train, x_test, y_train, y_test
 
-    # Generate synthetic 2D data
-    x1_data, y1_data = make_moons(n_samples=num_samples, noise=0.2, random_state=42)
+    # Generate random data with n_hidden features and n_hidden classes
+    # keeping input and output dimensions equal to n_hidden.
+    x1_data = numpy.random.randn(num_samples, n_hidden)
+    y1_data = numpy.random.randint(0, n_hidden, size=num_samples)  # n_hidden classes
 
     # Prepare data
     x1_train, x1_test, y1_train, y1_test = prepare_data(x1_data, y1_data)
 
-    model = FCSmall(2, torch.nn.ReLU, hidden=n_hidden)
+    model = FCSmall(n_hidden, torch.nn.ReLU, hidden=n_hidden)
     optimizer = torch.optim.Adam(model.parameters())
 
     num_epochs = 100
@@ -325,38 +339,48 @@ def prepare_data(x, y, test_size=0.1, random_state=42):
 
     # This internal flag tells us whether all the layers
     # were linear and were replaced with the GLWE backend
-    assert hybrid_local._all_layers_are_pure_linear  # pylint: disable=protected-access
+    # Check if GLWE optimization should be used based on input dimension
+    should_use_glwe = n_hidden >= 512
+    is_pure_linear = hybrid_local._has_only_large_linear_layers  # pylint: disable=protected-access
+    assert is_pure_linear == should_use_glwe
 
     hybrid_local.compile_model(x1_train, n_bits=10)
 
     y_qm = hybrid_local(x1_test, fhe="disable").numpy()
     y_hybrid_torch = hybrid_local(x1_test, fhe="torch").detach().numpy()
-    y_glwe = hybrid_local(x1_test, fhe="execute").numpy()
 
-    y1_test = y1_test.numpy()
-    n_correct_fp32 = numpy.sum(numpy.argmax(y_torch, axis=1) == y1_test)
-    n_correct_qm = numpy.sum(numpy.argmax(y_qm, axis=1) == y1_test)
-    n_correct_glwe = numpy.sum(numpy.argmax(y_glwe, axis=1) == y1_test)
+    # Only test GLWE execution if input dimension is >= 512
+    if should_use_glwe:
+        y_glwe = hybrid_local(x1_test, fhe="execute").numpy()
+
+        y1_test = y1_test.numpy()
+        n_correct_fp32 = numpy.sum(numpy.argmax(y_torch, axis=1) == y1_test)
+        n_correct_qm = numpy.sum(numpy.argmax(y_qm, axis=1) == y1_test)
+        n_correct_glwe = numpy.sum(numpy.argmax(y_glwe, axis=1) == y1_test)
 
-    # These two should be exactly the same
-    assert numpy.all(numpy.allclose(y_torch, y_hybrid_torch, rtol=1, atol=0.001))
+        # These two should be exactly the same
+        assert numpy.all(numpy.allclose(y_torch, y_hybrid_torch, rtol=1, atol=0.001))
 
-    # The clear quantization vs fp32 test has more tolerance
-    threshold_fhe = 0.01
+        # The clear quantization vs fp32 test has more tolerance
+        threshold_fhe = 0.01
 
-    diff = numpy.abs(y_torch - y_glwe) > threshold_fhe
-    if numpy.any(diff):
-        print(f"Value discrepancy detected for GLWE backend, with epsilon={threshold_fhe}")
-        print("Model output (torch fp32)", y_torch[diff])
-        print("Model output (glwe)", y_glwe[diff])
-        print("Model output (quantized clear)", y_qm[diff])
+        diff = numpy.abs(y_torch - y_glwe) > threshold_fhe
+        if numpy.any(diff):
+            print(f"Value discrepancy detected for GLWE backend, with epsilon={threshold_fhe}")
+            print("Model output (torch fp32)", y_torch[diff])
+            print("Model output (glwe)", y_glwe[diff])
+            print("Model output (quantized clear)", y_qm[diff])
 
-    assert numpy.all(numpy.allclose(y_qm, y_glwe, rtol=1, atol=threshold_fhe))
-    assert numpy.all(numpy.allclose(y_torch, y_glwe, rtol=1, atol=threshold_fhe))
+        assert numpy.all(numpy.allclose(y_qm, y_glwe, rtol=1, atol=threshold_fhe))
+        assert numpy.all(numpy.allclose(y_torch, y_glwe, rtol=1, atol=threshold_fhe))
 
-    n_correct_delta_threshold_fhe = 1
-    # Check accuracy between fp32 and glwe
-    assert numpy.abs(n_correct_fp32 - n_correct_glwe) <= n_correct_delta_threshold_fhe
+        n_correct_delta_threshold_fhe = 1
+        # Check accuracy between fp32 and glwe
+        assert numpy.abs(n_correct_fp32 - n_correct_glwe) <= n_correct_delta_threshold_fhe
 
-    # Check accuracy between quantized and glwe
-    assert numpy.abs(n_correct_qm - n_correct_glwe) <= n_correct_delta_threshold_fhe
+        # Check accuracy between quantized and glwe
+        assert numpy.abs(n_correct_qm - n_correct_glwe) <= n_correct_delta_threshold_fhe
+    else:
+        # For non-GLWE cases, just verify the torch outputs match
+        assert numpy.all(numpy.allclose(y_torch, y_hybrid_torch, rtol=1, atol=0.001))
+        assert numpy.all(numpy.allclose(y_qm, y_hybrid_torch, rtol=1, atol=0.01))
diff --git a/use_case_examples/deployment/README.md b/use_case_examples/deployment/README.md
index 38baac25b..fed7b713e 100644
--- a/use_case_examples/deployment/README.md
+++ b/use_case_examples/deployment/README.md
@@ -7,26 +7,26 @@ This folder contains examples of how to deploy Concrete ML models using Fully Ho
 The deployment process generally follows these steps:
 
 1. Train the model (optional, depending on the use case)
-2. Compile the model to an FHE circuit
-3. Deploy the model using Docker
-4. Run inference using a client (locally or in Docker)
+1. Compile the model to an FHE circuit
+1. Deploy the model using Docker
+1. Run inference using a client (locally or in Docker)
 
 ## Available Examples
 
 We provide three different use cases to demonstrate the deployment process:
 
 1. [Breast Cancer Classification](./breast_cancer/README.md)
-2. [Sentiment Analysis](./sentiment_analysis/README.md)
-3. [CIFAR-10 Image Classification](./cifar/README.md)
+1. [Sentiment Analysis](./sentiment_analysis/README.md)
+1. [CIFAR-10 Image Classification](./cifar/README.md)
 
 ## Getting Started
 
 Each example folder contains its own README with specific instructions. However, the general process is similar:
 
 1. Train or compile the model using the provided scripts
-2. Deploy the model using `deploy_to_docker.py` from the `server` folder
-3. Build the client Docker image
-4. Run the client to interact with the deployed model
+1. Deploy the model using `deploy_to_docker.py` from the `server` folder
+1. Build the client Docker image
+1. Run the client to interact with the deployed model
 
 For detailed instructions, please refer to the README in each example folder.