diff --git a/.github/workflows/test_cli_cpu_neural_compressor.yaml b/.github/workflows/test_cli_cpu_neural_compressor.yaml
deleted file mode 100644
index 435f4216..00000000
--- a/.github/workflows/test_cli_cpu_neural_compressor.yaml
+++ /dev/null
@@ -1,51 +0,0 @@
-name: CLI CPU Intel Neural Compressor Tests
-
-on:
-  workflow_dispatch:
-  push:
-    branches:
-      - main
-  pull_request:
-    branches:
-      - main
-    types:
-      - opened
-      - reopened
-      - synchronize
-      - labeled
-      - unlabeled
-
-concurrency:
-  cancel-in-progress: true
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
-
-jobs:
-  run_cli_cpu_neural_compressor_tests:
-    if: ${{
-      (github.event_name == 'push') ||
-      (github.event_name == 'workflow_dispatch') ||
-      contains( github.event.pull_request.labels.*.name, 'cli') ||
-      contains( github.event.pull_request.labels.*.name, 'cpu') ||
-      contains( github.event.pull_request.labels.*.name, 'neural_compressor') ||
-      contains( github.event.pull_request.labels.*.name, 'cli_cpu_neural_compressor')
-      }}
-
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Set up Python 3.10
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.10"
-
-      - name: Install requirements
-        run: |
-          pip install --upgrade pip
-          pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
-          pip install -e .[testing,neural-compressor,diffusers,timm]
-
-      - name: Run tests
-        run: pytest tests/test_cli.py -s -k "cli and cpu and neural_compressor"
diff --git a/optimum_benchmark/__init__.py b/optimum_benchmark/__init__.py
index 313fb22a..7be4c4c3 100644
--- a/optimum_benchmark/__init__.py
+++ b/optimum_benchmark/__init__.py
@@ -1,9 +1,7 @@
 from .backends import (
     BackendConfig,
-    INCConfig,
     IPEXConfig,
     LlamaCppConfig,
-    LLMSwarmConfig,
     ORTConfig,
     OVConfig,
     PyTorchConfig,
@@ -26,10 +24,8 @@
     "EnergyStarConfig",
     "InferenceConfig",
     "IPEXConfig",
-    "INCConfig",
     "InlineConfig",
     "LauncherConfig",
-    "LLMSwarmConfig",
     "ORTConfig",
     "OVConfig",
     "ProcessConfig",
diff --git a/optimum_benchmark/backends/__init__.py b/optimum_benchmark/backends/__init__.py
index ec146f0b..2019270a 100644
--- a/optimum_benchmark/backends/__init__.py
+++ b/optimum_benchmark/backends/__init__.py
@@ -1,8 +1,6 @@
 from .config import BackendConfig
 from .ipex.config import IPEXConfig
 from .llama_cpp.config import LlamaCppConfig
-from .llm_swarm.config import LLMSwarmConfig
-from .neural_compressor.config import INCConfig
 from .onnxruntime.config import ORTConfig
 from .openvino.config import OVConfig
 from .py_txi.config import PyTXIConfig
@@ -18,9 +16,7 @@
     "OVConfig",
     "TorchORTConfig",
     "TRTLLMConfig",
-    "INCConfig",
     "PyTXIConfig",
-    "LLMSwarmConfig",
     "BackendConfig",
     "VLLMConfig",
     "LlamaCppConfig",
diff --git a/optimum_benchmark/backends/llm_swarm/__init__.py b/optimum_benchmark/backends/llm_swarm/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/optimum_benchmark/backends/llm_swarm/backend.py b/optimum_benchmark/backends/llm_swarm/backend.py
deleted file mode 100644
index 8139e4ea..00000000
--- a/optimum_benchmark/backends/llm_swarm/backend.py
+++ /dev/null
@@ -1,85 +0,0 @@
-import asyncio
-from typing import Any, Dict, List
-
-import torch
-from huggingface_hub import AsyncInferenceClient
-from llm_swarm import LLMSwarm
-from llm_swarm import LLMSwarmConfig as LLMSwarmCfg
-
-from ...task_utils import TEXT_GENERATION_TASKS
-from ..base import Backend
-from .config import LLMSwarmConfig
-
-
-class LLMSwarmBackend(Backend[LLMSwarmConfig]):
-    NAME: str = "llm-swarm"
-
-    def __init__(self, config: LLMSwarmConfig) -> None:
-        super().__init__(config)
-
-        if self.config.task not in TEXT_GENERATION_TASKS:
-            raise NotImplementedError(f"LLM Swarm does not support task {self.config.task}")
-
-    def load(self) -> None:
-        self.logger.info("\t+ Downloading pretrained model")
-        self.download_pretrained_model()
-        self.logger.info("\t+ Preparing generation config")
-        self.prepare_generation_config()
-        self.logger.info("\t+ Loading pretrained model")
-        self.load_model_from_pretrained()
-
-    def load_model_from_pretrained(self) -> None:
-        self.llm_swarm_config = LLMSwarmCfg(
-            gpus=self.config.gpus,
-            model=self.config.model,
-            instances=self.config.instances,
-            inference_engine=self.config.inference_engine,
-            slurm_template_path=self.config.slurm_template_path,
-            load_balancer_template_path=self.config.load_balancer_template_path,
-            per_instance_max_parallel_requests=self.config.per_instance_max_parallel_requests,
-            revision=self.config.model_kwargs.get("revision", "main"),
-            debug_endpoint=self.config.debug_endpoint,
-        )
-        self.llm_swarm = LLMSwarm(self.llm_swarm_config).__enter__()
-        self.client = AsyncInferenceClient(self.llm_swarm.endpoint)
-
-    def download_pretrained_model(self) -> None:
-        with torch.device("meta"):
-            self.auto_model_loader.from_pretrained(self.config.model, **self.config.model_kwargs)
-
-    def prepare_generation_config(self) -> None:
-        self.generation_config.eos_token_id = -100
-        self.generation_config.pad_token_id = -100
-
-        model_cache_folder = f"models/{self.config.model}".replace("/", "--")
-        model_cache_path = f"{self.config.volume}/{model_cache_folder}"
-        snapshot_file = f"{model_cache_path}/refs/{self.config.model_kwargs.get('revision', 'main')}"
-        snapshot_ref = open(snapshot_file, "r").read().strip()
-        model_snapshot_path = f"{model_cache_path}/snapshots/{snapshot_ref}"
-        self.logger.info("\t+ Saving new pretrained generation config")
-        self.generation_config.save_pretrained(save_directory=model_snapshot_path)
-
-    def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
-        if "inputs" in inputs:
-            inputs = {"prompt": self.pretrained_processor.batch_decode(inputs["inputs"].tolist())}
-        elif "input_ids" in inputs:
-            inputs = {"prompt": self.pretrained_processor.batch_decode(inputs["input_ids"].tolist())}
-        else:
-            raise ValueError("inputs must contain either input_ids or inputs")
-
-        return inputs
-
-    async def single_client_call(self, prompt: str, kwargs: Dict[str, Any]) -> str:
-        return await self.client.text_generation(prompt, max_new_tokens=kwargs.get("max_new_tokens", 1))
-
-    async def batch_client_call(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> List[str]:
-        return await asyncio.gather(*(self.single_client_call(p, kwargs) for p in inputs["prompt"]))
-
-    def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> List[str]:
-        return asyncio.run(self.batch_client_call(inputs, kwargs))
-
-    def prefill(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> List[str]:
-        return asyncio.run(self.batch_client_call(inputs, kwargs))
-
-    def generate(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> List[str]:
-        return asyncio.run(self.batch_client_call(inputs, kwargs))
diff --git a/optimum_benchmark/backends/llm_swarm/config.py b/optimum_benchmark/backends/llm_swarm/config.py
deleted file mode 100644
index 745cdd3f..00000000
--- a/optimum_benchmark/backends/llm_swarm/config.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from dataclasses import dataclass
-from typing import Optional
-
-from ...import_utils import llm_swarm_version
-from ..config import BackendConfig
-
-
-@dataclass
-class LLMSwarmConfig(BackendConfig):
-    name: str = "llm-swarm"
-    version: Optional[str] = llm_swarm_version()
-    _target_: str = "optimum_benchmark.backends.llm_swarm.backend.LLMSwarmBackend"
-
-    # optimum benchmark specific
-    no_weights: bool = False
-
-    # llm-swarm specific
-    gpus: int = 8
-    instances: int = 1
-    inference_engine: str = "tgi"
-    volume: str = "/fsx/ilyas/.cache"
-    per_instance_max_parallel_requests: int = 500
-    slurm_template_path: str = "/fsx/ilyas/swarm-templates/tgi_h100.template.slurm"
-    load_balancer_template_path: str = "/fsx/ilyas/swarm-templates/nginx.template.conf"
-    debug_endpoint: Optional[str] = None
-
-    def __post_init__(self):
-        super().__post_init__()
-
-        # so that downloaded artifacts are stored in the same place
-        self.hub_kwargs["cache_dir"] = self.volume
diff --git a/optimum_benchmark/backends/neural_compressor/__init__.py b/optimum_benchmark/backends/neural_compressor/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/optimum_benchmark/backends/neural_compressor/backend.py b/optimum_benchmark/backends/neural_compressor/backend.py
deleted file mode 100644
index c180a5ba..00000000
--- a/optimum_benchmark/backends/neural_compressor/backend.py
+++ /dev/null
@@ -1,151 +0,0 @@
-import os
-from collections import OrderedDict
-from tempfile import TemporaryDirectory
-from typing import Any, Dict
-
-import torch
-from hydra.utils import get_class
-from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion
-from optimum.intel.neural_compressor.quantization import INCQuantizer
-
-from ...generators.dataset_generator import DatasetGenerator
-from ..base import Backend
-from ..transformers_utils import fast_weights_init
-from .config import INCConfig
-from .utils import TASKS_TO_INCMODELS
-
-
-class INCBackend(Backend[INCConfig]):
-    NAME: str = "neural-compressor"
-
-    def __init__(self, config: INCConfig):
-        super().__init__(config)
-
-        if self.config.task in TASKS_TO_INCMODELS:
-            self.incmodel_class = get_class(TASKS_TO_INCMODELS[self.config.task])
-            self.logger.info(f"Using INCModel class {self.incmodel_class.__name__}")
-        else:
-            raise NotImplementedError(f"INCBackend does not support task {self.config.task}")
-
-    def load(self) -> None:
-        self.logger.info("\t+ Creating backend temporary directory")
-        self.tmpdir = TemporaryDirectory()
-
-        if self.config.ptq_quantization:
-            if self.config.no_weights:
-                self.logger.info("\t+ Creating no weights AutoModel")
-                self.create_no_weights_model()
-                self.logger.info("\t+ Loading no weights AutoModel")
-                self.load_automodel_with_no_weights()
-            else:
-                self.logger.info("\t+ Loading pretrained AutoModel")
-                self.load_automodel_from_pretrained()
-            self.logger.info("\t+ Applying post-training quantization")
-            self.quantize_automodel()
-            self.logger.info("\t+ Loading quantized INCModel")
-            original_model, self.config.model = self.config.model, self.quantized_model
-            self.load_incmodel_from_pretrained()
-            self.config.model = original_model
-        elif self.config.no_weights:
-            self.logger.info("\t+ Creating no weights INCModel")
-            self.create_no_weights_model()
-            self.logger.info("\t+ Loading no weights INCModel")
-            self.load_incmodel_with_no_weights()
-        else:
-            self.logger.info("\t+ Loading pretrained INCModel")
-            self.load_incmodel_from_pretrained()
-
-        self.tmpdir.cleanup()
-
-    def load_automodel_from_pretrained(self) -> None:
-        self.pretrained_model = self.automodel_loader.from_pretrained(self.config.model, **self.config.model_kwargs)
-
-    def load_automodel_with_no_weights(self) -> None:
-        original_model, self.config.model = self.config.model, self.no_weights_model
-
-        with fast_weights_init():
-            self.load_automodel_from_pretrained()
-
-        self.logger.info("\t+ Tying model weights")
-        self.pretrained_model.tie_weights()
-
-        self.config.model = original_model
-
-    def load_incmodel_from_pretrained(self) -> None:
-        self.pretrained_model = self.incmodel_class.from_pretrained(self.config.model, **self.config.model_kwargs)
-
-    def load_incmodel_with_no_weights(self) -> None:
-        original_model, self.config.model = self.config.model, self.no_weights_model
-
-        with fast_weights_init():
-            self.load_incmodel_from_pretrained()
-
-        self.logger.info("\t+ Tying model weights")
-        self.pretrained_model.model.tie_weights()
-
-        self.config.model = original_model
-
-    def create_no_weights_model(self) -> None:
-        self.no_weights_model = os.path.join(self.tmpdir.name, "no_weights_model")
-        self.logger.info("\t+ Creating no weights model directory")
-        os.makedirs(self.no_weights_model, exist_ok=True)
-        self.logger.info("\t+ Creating no weights model state dict")
-        state_dict = torch.nn.Linear(1, 1).state_dict()
-        self.logger.info("\t+ Saving no weights model pytorch_model.bin")
-        torch.save(state_dict, os.path.join(self.no_weights_model, "pytorch_model.bin"))
-        self.logger.info("\t+ Saving no weights model pretrained config")
-        self.pretrained_config.save_pretrained(save_directory=self.no_weights_model)
-
-    def quantize_automodel(self) -> None:
-        self.quantized_model = f"{self.tmpdir.name}/quantized_model"
-        self.logger.info("\t+ Processing quantization config")
-        ptq_quantization_config = self.config.ptq_quantization_config.copy()
-        ptq_quantization_config["accuracy_criterion"] = AccuracyCriterion(
-            **ptq_quantization_config["accuracy_criterion"]
-        )
-        ptq_quantization_config["tuning_criterion"] = TuningCriterion(**ptq_quantization_config["tuning_criterion"])
-        ptq_quantization_config = PostTrainingQuantConfig(**ptq_quantization_config)
-        self.logger.info("\t+ Creating quantizer")
-        quantizer = INCQuantizer.from_pretrained(
-            model=self.pretrained_model,
-            task=self.config.task,
-            seed=self.config.seed,
-            # TODO: add support for these
-            calibration_fn=None,
-            eval_fn=None,
-        )
-
-        if self.config.calibration:
-            self.logger.info("\t+ Generating calibration dataset")
-            dataset_shapes = {"dataset_size": 1, "sequence_length": 1, **self.model_shapes}
-            calibration_dataset = DatasetGenerator(
-                task=self.config.task, dataset_shapes=dataset_shapes, model_shapes=self.model_shapes
-            )()
-            columns_to_be_removed = list(set(calibration_dataset.column_names) - set(quantizer._signature_columns))
-            calibration_dataset = calibration_dataset.remove_columns(columns_to_be_removed)
-        else:
-            calibration_dataset = None
-
-        self.logger.info("\t+ Quantizing model")
-        quantizer.quantize(
-            save_directory=self.quantized_model,
-            calibration_dataset=calibration_dataset,
-            quantization_config=ptq_quantization_config,
-            # TODO: add support for these
-            remove_unused_columns=True,
-            data_collator=None,
-            file_name=None,
-            batch_size=1,
-        )
-
-    @torch.inference_mode()
-    def forward(self, input: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict:
-        return self.pretrained_model(**input, **kwargs)
-
-    @torch.inference_mode()
-    def prefill(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict:
-        return self.pretrained_model.generate(**inputs, **kwargs)
-
-    @torch.inference_mode()
-    def generate(self, input: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict:
-        return self.pretrained_model.generate(**input, **kwargs)
diff --git a/optimum_benchmark/backends/neural_compressor/config.py b/optimum_benchmark/backends/neural_compressor/config.py
deleted file mode 100644
index 8aea5964..00000000
--- a/optimum_benchmark/backends/neural_compressor/config.py
+++ /dev/null
@@ -1,71 +0,0 @@
-from dataclasses import dataclass, field
-from typing import Any, Dict, Optional
-
-from omegaconf import OmegaConf
-
-from ...import_utils import neural_compressor_version
-from ..config import BackendConfig
-
-# https://github.com/intel/neural-compressor/blob/master/neural_compressor/config.py#L490
-ACCURACY_CRITERION_CONFIG = {"higher_is_better": True, "criterion": "relative", "tolerable_loss": 0.01}
-
-# https://github.com/intel/neural-compressor/blob/master/neural_compressor/config.py#L593
-TUNING_CRITERION_CONFIG = {
-    "strategy": "basic",
-    "strategy_kwargs": None,
-    "timeout": 0,
-    "max_trials": 100,
-    "objective": "performance",
-}
-
-# https://github.com/intel/neural-compressor/blob/master/neural_compressor/config.py#L1242
-PTQ_QUANTIZATION_CONFIG = {
-    "device": "cpu",
-    "backend": "default",
-    "domain": "auto",
-    "recipes": {},
-    "quant_format": "default",
-    "inputs": [],
-    "outputs": [],
-    "approach": "static",
-    "calibration_sampling_size": [100],
-    "op_type_dict": None,
-    "op_name_dict": None,
-    "reduce_range": None,
-    "example_inputs": None,
-    "excluded_precisions": [],
-    "quant_level": "auto",
-    "accuracy_criterion": ACCURACY_CRITERION_CONFIG,
-    "tuning_criterion": TUNING_CRITERION_CONFIG,
-}
-
-
-@dataclass
-class INCConfig(BackendConfig):
-    name: str = "neural-compressor"
-    version: Optional[str] = neural_compressor_version()
-    _target_: str = "optimum_benchmark.backends.neural_compressor.backend.INCBackend"
-
-    # load options
-    no_weights: bool = False
-
-    # post-training quantization options
-    ptq_quantization: bool = False
-    ptq_quantization_config: Dict[str, Any] = field(default_factory=dict)
-
-    # calibration options
-    calibration: bool = False
-    calibration_config: Dict[str, Any] = field(default_factory=dict)
-
-    def __post_init__(self):
-        super().__post_init__()
-
-        if self.device != "cpu":
-            raise ValueError(f"INCBackend only supports CPU devices, got {self.device}")
-
-        if self.ptq_quantization:
-            self.ptq_quantization_config = OmegaConf.to_object(
-                OmegaConf.merge(PTQ_QUANTIZATION_CONFIG, self.ptq_quantization_config)
-            )
-            if self.ptq_quantization_config["approach"] == "static" and not self.calibration:
-                raise ValueError("Calibration must be enabled when using static quantization.")
diff --git a/optimum_benchmark/backends/neural_compressor/utils.py b/optimum_benchmark/backends/neural_compressor/utils.py
deleted file mode 100644
index beb99977..00000000
--- a/optimum_benchmark/backends/neural_compressor/utils.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from optimum.intel.neural_compressor.utils import _HEAD_TO_AUTOMODELS
-
-TASKS_TO_INCMODELS = {
-    task: f"optimum.intel.neural_compressor.{incmodel_name}" for task, incmodel_name in _HEAD_TO_AUTOMODELS.items()
-}