diff --git a/docs/docsutils.py b/docs/docsutils.py
new file mode 100644
index 0000000..c78fab7
--- /dev/null
+++ b/docs/docsutils.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from io import StringIO
+
+from matplotlib.figure import Figure
+
+
+def fig_to_html(fig: Figure) -> str:
+    buffer = StringIO()
+    fig.savefig(buffer, format="svg")
+    return buffer.getvalue()
diff --git a/docs/environment.yml b/docs/environment.yml
new file mode 100644
index 0000000..52d7743
--- /dev/null
+++ b/docs/environment.yml
@@ -0,0 +1,19 @@
+name: readthedocs
+channels:
+  - defaults
+dependencies:
+  - python=3.10
+  - python-graphviz
+  - pip
+  - pip:
+    - markdown-exec
+    - mkdocs-exclude
+    - mkdocs-jupyter
+    - mkdocs-material
+    - mkdocs-section-index==0.3.6
+    - mkdocs==1.5.2
+    - mkdocstrings
+    - mkdocstrings-python
+    - -e ../
+    - pulser>=0.12.0
+    - amazon-braket-sdk
diff --git a/docs/qinfo_tools/qng.md b/docs/qinfo_tools/qng.md
new file mode 100644
index 0000000..380dd82
--- /dev/null
+++ b/docs/qinfo_tools/qng.md
@@ -0,0 +1,184 @@
+# The Quantum Natural Gradient optimizer
+
+Qadence-libs provides a set of optimizers based on quantum information tools, in particular based on the Quantum Fisher Information[^1] (QFI). The Quantum Natural Gradient [^2] (QNG) is a gradient-based optimizer which uses the QFI matrix to better navigate the optimizer's descent to the minimum. The parameter update rule for the QNG optimizer is written as:
+
+$$
+\theta_{t+1} = \theta_t - \eta g^{-1}(\theta_t)\nabla \mathcal{L}(\theta_t)
+$$
+
+where $g(\theta)$ is the Fubiny-Study metric tensor (aka Quantum Geometric Tensor), which is equivalent to the Quantum Fisher Information matrix $F(\theta)$ up to a constant factor $F(\theta)= 4 g(\theta)$. The Quantum Fisher Information can be written as the Hessian of the fidelity of a quantum state:
+
+$$
+  F_{i j}(\theta)=-\left.2 \frac{\partial}{\partial \theta_i} \frac{\partial}{\partial \theta_j}\left|\left\langle\psi\left(\theta^{\prime}\right) \mid \psi(\theta)\right\rangle\right|^2\right|_{{\theta}^{\prime}=\theta}
+$$
+
+However, computing the above expression is a costly operation scaling quadratically with the number of parameters in the variational quantum circuit. It is thus usual to use approximate methods when dealing with the QFI matrix. Qadence-Libs provides a SPSA-based implementation of the Quantum Natural Gradient[^3]. The [SPSA](https://www.jhuapl.edu/spsa/) (Simultaneous Perturbation Stochastic Approximation) algorithm is a well known finite differences-based algorithm. QNG-SPSA constructs an iterative approximation to the QFI matrix with a constant number of circuit evaluations that does not scale with the number of parameters. Although the SPSA algorithm outputs a rough approximation of the QFI matrix, the QNG-SPSA has been proven to work well while being a very efficient method due to the constant overhead in circuit evaluations (only 6 extra evaluations per iteration).
+
+In this tutorial, we use the QNG and QNG-SPSA optimizers with the Quantum Circuit Learning algorithm, a variational quantum algorithm which uses Quantum Neural Networks as universal function approximators.
+
+Keep in mind that only *circuit* parameters can be optimized with the QNG optimizer, since we can only calculate the QFI matrix of parameters contained in the circuit. If your model holds other trainable, non-circuit parameters, such as scaling or shifting of the input/output, another optimizer must be used for to optimize those parameters.
+```python exec="on" source="material-block" html="1" session="main"
+import torch
+from torch.utils.data import random_split
+import random
+import matplotlib.pyplot as plt
+
+from qadence import QuantumCircuit, QNN, FeatureParameter
+from qadence import kron, tag, hea, RX, Z, hamiltonian_factory
+
+from qadence_libs.qinfo_tools import QuantumNaturalGradient
+from qadence_libs.types import FisherApproximation
+```
+
+First, we prepare the Quantum Circuit Learning data. In this case we will fit a simple one-dimensional sin($x$) function:
+```python exec="on" source="material-block" html="1" session="main"
+# Ensure reproducibility
+seed = 0
+torch.manual_seed(seed)
+random.seed(seed)
+
+# Create dataset
+def qcl_training_data(
+    domain: tuple = (0, 2 * torch.pi), n_points: int = 200
+) -> tuple[torch.Tensor, torch.Tensor]:
+    start, end = domain
+
+    x_rand, _ = torch.sort(torch.DoubleTensor(n_points).uniform_(start, end))
+    y_rand = torch.sin(x_rand)
+
+    return x_rand, y_rand
+
+
+x, y = qcl_training_data()
+
+# random train/test split of the dataset
+train_subset, test_subset = random_split(x, [0.75, 0.25])
+train_ind = sorted(train_subset.indices)
+test_ind = sorted(test_subset.indices)
+
+x_train, y_train = x[train_ind], y[train_ind]
+x_test, y_test = x[test_ind], y[test_ind]
+```
+
+We now create the base Quantum Circuit that we will use with all the optimizers:
+```python exec="on" source="material-block" html="1" session="main"
+n_qubits = 3
+
+# create a simple feature map to encode the input data
+feature_param = FeatureParameter("phi")
+feature_map = kron(RX(i, feature_param) for i in range(n_qubits))
+feature_map = tag(feature_map, "feature_map")
+
+# create a digital-analog variational ansatz using Qadence convenience constructors
+ansatz = hea(n_qubits, depth=n_qubits)
+ansatz = tag(ansatz, "ansatz")
+
+# Observable
+observable = hamiltonian_factory(n_qubits, detuning= Z)
+```
+
+## Optimizers
+
+We will experiment with three different optimizers: ADAM, QNG and QNG-SPSA. To train a model with the different optimizers we will create a `QuantumModel` and reset the values of their variational parameters before each training loop so that all of them have the same starting point.
+
+```python exec="on" source="material-block" html="1" session="main"
+# Build circuit and model
+circuit = QuantumCircuit(n_qubits, feature_map, ansatz)
+model = QNN(circuit, [observable])
+
+# Loss function
+mse_loss = torch.nn.MSELoss()
+
+# Initial parameter values
+initial_params = torch.rand(model.num_vparams)
+```
+
+We can now train the model with the different corresponding optimizers:
+### ADAM
+```python exec="on" source="material-block" html="1" session="main"
+# Train with ADAM
+n_epochs_adam = 20
+lr_adam = 0.1
+
+model.reset_vparams(initial_params)
+optimizer = torch.optim.Adam(model.parameters(), lr=lr_adam)
+
+loss_adam = []
+for i in range(n_epochs_adam):
+    optimizer.zero_grad()
+    loss = mse_loss(model(values=x_train).squeeze(), y_train.squeeze())
+    loss_adam.append(float(loss))
+    loss.backward()
+    optimizer.step()
+```
+
+### QNG
+```python exec="on" source="material-block" html="1" session="main"
+# Train with QNG
+n_epochs_qng = 20
+lr_qng = 0.1
+
+model.reset_vparams(initial_params)
+optimizer = QuantumNaturalGradient(
+    model.parameters(),
+    lr=lr_qng,
+    approximation=FisherApproximation.EXACT,
+    model=model,
+    beta=0.1,
+)
+
+loss_qng = []
+for i in range(n_epochs_qng):
+    optimizer.zero_grad()
+    loss = mse_loss(model(values=x_train).squeeze(), y_train.squeeze())
+    loss_qng.append(float(loss))
+    loss.backward()
+    optimizer.step()
+```
+
+### QNG-SPSA
+```python exec="on" source="material-block" html="1" session="main"
+# Train with QNG-SPSA
+n_epochs_qng_spsa = 20
+lr_qng_spsa = 0.01
+
+model.reset_vparams(initial_params)
+optimizer = QuantumNaturalGradient(
+    model.parameters(),
+    lr=lr_qng_spsa,
+    approximation=FisherApproximation.SPSA,
+    model=model,
+    beta=0.1,
+    epsilon=0.01,
+)
+
+loss_qng_spsa = []
+for i in range(n_epochs_qng_spsa):
+    optimizer.zero_grad()
+    loss = mse_loss(model(values=x_train).squeeze(), y_train.squeeze())
+    loss_qng_spsa.append(float(loss))
+    loss.backward()
+    optimizer.step()
+```
+
+## Plotting
+
+We now plot the losses corresponding to each of the optimizers:
+```python exec="on" source="material-block" html="1" session="main"
+# Plot losses
+fig, _ = plt.subplots()
+plt.plot(range(n_epochs_adam), loss_adam, label="Adam optimizer")
+plt.plot(range(n_epochs_qng), loss_qng, label="QNG optimizer")
+plt.plot(range(n_epochs_qng_spsa), loss_qng_spsa, label="QNG-SPSA optimizer")
+plt.legend()
+plt.xlabel("Training epochs")
+plt.ylabel("Loss")
+
+from docs import docsutils # markdown-exec: hide
+print(docsutils.fig_to_html(plt.gcf())) # markdown-exec: hide
+```
+
+## References
+[^1]: [Meyer J., Information in Noisy Intermediate-Scale Quantum Applications, _Quantum 5, 539 (2021)_](https://quantum-journal.org/papers/q-2021-09-09-539/)
+[^2]: [Stokes _et al._, Quantum Natural Gradient, _Quantum 4, 269 (2020)._](https://quantum-journal.org/papers/q-2020-05-25-269/)
+[^3]: [Gacon _et al._, Simultaneous Perturbation Stochastic Approximation of the Quantum Fisher Information, _Quantum 5, 567 (2021)._](https://quantum-journal.org/papers/q-2021-10-20-567/)
diff --git a/mkdocs.yml b/mkdocs.yml
index bf5fc03..4f6e3e1 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -4,7 +4,8 @@ repo_name: "qadence_libs"
 
 nav:
   - Overview: index.md
-  - Sample page: sample_page.md
+  - Quantum information tools:
+    - Quantum Natural Gradient: qinfo_tools/qng.md
 
 theme:
   name: material
@@ -34,6 +35,7 @@ theme:
       name: Switch to light mode
 
 markdown_extensions:
+- footnotes
 - admonition  # for notes
 - pymdownx.arithmatex: # for mathjax
     generic: true
diff --git a/pyproject.toml b/pyproject.toml
index 31e6aed..25db6cb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,11 +13,12 @@ authors = [
     { name = "Vincent Elfving", email = "vincent.elfving@pasqal.com" },
     { name = "Dominik Seitz", email = "dominik.seitz@pasqal.com" },
     { name = "Niklas Heim", email = "niklas.heim@pasqal.com" },
+    { name = "Ignacio Fernández Graña", email = "ignacio.fernandez-grana@pasqal.com" },
 ]
 requires-python = ">=3.9,<3.12"
 license = {text = "Apache 2.0"}
 keywords = ["quantum"]
-version = "0.1.1"
+version = "0.1.2"
 classifiers=[
     "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python",
diff --git a/qadence_libs/__init__.py b/qadence_libs/__init__.py
index fc3af24..6551fe3 100644
--- a/qadence_libs/__init__.py
+++ b/qadence_libs/__init__.py
@@ -16,6 +16,7 @@
 
 list_of_submodules = [
     ".constructors",
+    ".qinfo_tools",
 ]
 
 __all__ = []
diff --git a/qadence_libs/qinfo_tools/__init__.py b/qadence_libs/qinfo_tools/__init__.py
new file mode 100644
index 0000000..0fa4293
--- /dev/null
+++ b/qadence_libs/qinfo_tools/__init__.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+from .qfi import get_quantum_fisher, get_quantum_fisher_spsa
+from .qng import QuantumNaturalGradient
+
+# Modules to be automatically added to the qadence namespace
+__all__ = [
+    "QuantumNaturalGradient",
+    "get_quantum_fisher",
+    "get_quantum_fisher_spsa",
+]
diff --git a/qadence_libs/qinfo_tools/qfi.py b/qadence_libs/qinfo_tools/qfi.py
new file mode 100644
index 0000000..2bf9c6a
--- /dev/null
+++ b/qadence_libs/qinfo_tools/qfi.py
@@ -0,0 +1,150 @@
+from __future__ import annotations
+
+import torch
+from qadence import Overlap, QuantumCircuit
+from qadence.blocks import parameters, primitive_blocks
+from qadence.types import BackendName, DiffMode, OverlapMethod
+from torch import Tensor
+
+from qadence_libs.qinfo_tools.spsa import spsa_2gradient_step
+from qadence_libs.qinfo_tools.utils import hessian
+
+
+def _positive_semidefinite_sqrt(A: Tensor) -> Tensor:
+    """Computes the square root of a real positive semi-definite matrix.
+
+    Code taken from https://github.com/pytorch/pytorch/issues/25481#issuecomment-1032789228
+    """
+    L, Q = torch.linalg.eigh(A)
+    zero = torch.zeros((), device=L.device, dtype=L.dtype)
+    threshold = L.max(-1).values * L.size(-1) * torch.finfo(L.dtype).eps
+    L = L.where(L > threshold.unsqueeze(-1), zero)  # zero out small components
+    return (Q * L.sqrt().unsqueeze(-2)) @ Q.mH
+
+
+def _set_circuit_vparams(circuit: QuantumCircuit, vparams_dict: dict[str, Tensor]) -> None:
+    """Sets the variational parameter values of the circuit."""
+    blocks = primitive_blocks(circuit.block)
+    for block in blocks:
+        params = parameters(block)
+        for p in params:
+            if p.trainable:
+                p.value = vparams_dict[p.name]
+
+
+def _get_fm_dict(circuit: QuantumCircuit) -> dict[str, Tensor]:
+    """Returns a dictionary holding the FM parameters of the circuit."""
+    fm_dict = {}
+    blocks = primitive_blocks(circuit.block)
+    for block in blocks:
+        params = parameters(block)
+        for p in params:
+            if not p.trainable:
+                fm_dict[p.name] = torch.tensor([p.value])
+    return fm_dict
+
+
+def get_quantum_fisher(
+    circuit: QuantumCircuit,
+    vparams_dict: dict[str, Tensor] = dict(),
+    fm_dict: dict[str, Tensor] = dict(),
+    backend: BackendName = BackendName.PYQTORCH,
+    overlap_method: OverlapMethod = OverlapMethod.EXACT,
+    diff_mode: DiffMode = DiffMode.AD,
+) -> Tensor:
+    """Returns the exact Quantum Fisher Information (QFI) matrix.
+
+    Args:
+        circuit (QuantumCircuit): The quantum circuit.
+        vparams_dict (dict[str, Tensor]):
+            Dictionary holding the values of the variational parameters of the circuit.
+        fm_dict (dict[str, Tensor]):
+            Dictionary holding the values of the Feature Map parameters of the circuit.
+        overlap_method (OverlapMethod, optional): Defaults to OverlapMethod.EXACT.
+        diff_mode (DiffMode, optional): Defaults to DiffMode.ad.
+
+    Returns:
+        Tensor:
+            The exact QFI matrix of the circuit.
+    """
+    # The FM dictionary is needed for the forward method in Overlap()
+    if not fm_dict:
+        fm_dict = _get_fm_dict(circuit)
+
+    if vparams_dict:
+        _set_circuit_vparams(circuit, vparams_dict)
+
+    overlap_model = Overlap(
+        circuit,
+        circuit,
+        backend=backend,
+        diff_mode=diff_mode,
+        method=overlap_method,
+    )
+    overlap = overlap_model(bra_param_values=fm_dict, ket_param_values=fm_dict)
+
+    # Retrieve variational parameters of the overlap model
+    # Importantly, the vparams of the overlap model are the vparams of the bra tensor,
+    # Which means if we differentiate wrt vparams we are differentiating only wrt the
+    # parameters in the bra and not in the ket
+    vparams = [v for v in overlap_model._params.values() if v.requires_grad]
+    return -2 * hessian(overlap, vparams)
+
+
+def get_quantum_fisher_spsa(
+    circuit: QuantumCircuit,
+    iteration: int,
+    vparams_dict: dict[str, Tensor] = dict(),
+    fm_dict: dict[str, Tensor] = dict(),
+    previous_qfi_estimator: Tensor | None = None,
+    epsilon: float = 10e-3,
+    beta: float = 10e-2,
+    backend: BackendName = BackendName.PYQTORCH,
+    overlap_method: OverlapMethod = OverlapMethod.EXACT,
+    diff_mode: DiffMode = DiffMode.AD,
+) -> tuple[Tensor, Tensor]:
+    """Returns the a SPSA-approximation of the Quantum Fisher Information (QFI) matrix.
+
+    Args:
+        circuit (QuantumCircuit): The quantum circuit.
+        iteration (int): Current iteration in the SPSA iterative loop.
+        vparams_values (dict[str, Tensor]):
+            Dictionary holding the values of the variational parameters of the circuit.
+        fm_dict (dict[str, Tensor]):
+            Dictionary holding the values of the Feature Map parameters of the circuit.
+        overla p_method (OverlapMethod, optional): Defaults to OverlapMethod.EXACT.
+        diff_mode (DiffMode, optional): Defaults to DiffMode.ad.
+
+    Returns:
+        tuple[Tensor, Tensor]:
+            Tuple containing the QFI matrix and its positive semi-definite estimator.
+    """
+    # Retrieve feature parameters if they are not given as inputs
+    # The FM dictionary is needed for the forward run of the Overlap() model
+    if not fm_dict:
+        fm_dict = _get_fm_dict(circuit)
+
+    ovrlp_model = Overlap(
+        circuit,
+        circuit,
+        backend=backend,
+        diff_mode=diff_mode,
+        method=overlap_method,
+    )
+
+    # Calculate the QFI matrix
+    qfi_mat = -2 * spsa_2gradient_step(ovrlp_model, epsilon, fm_dict, vparams_dict)
+
+    # Calculate the QFI estimator from the old estimator of qfi_mat
+    if iteration == 0:
+        qfi_mat_estimator = qfi_mat
+    else:
+        a_k = 1 / (1 + iteration)
+        qfi_mat_estimator = a_k * (iteration * previous_qfi_estimator + qfi_mat)  # type: ignore
+
+    # Get the positive-semidefinite version of the matrix for the update rule in QNG
+    qfi_mat_positive_sd = _positive_semidefinite_sqrt(qfi_mat_estimator @ qfi_mat_estimator)
+    qfi_mat_positive_sd = qfi_mat_positive_sd + beta * torch.eye(ovrlp_model.num_vparams)
+    qfi_mat_positive_sd = qfi_mat_positive_sd / (1 + beta)  # regularization
+
+    return qfi_mat_estimator, qfi_mat_positive_sd
diff --git a/qadence_libs/qinfo_tools/qng.py b/qadence_libs/qinfo_tools/qng.py
new file mode 100644
index 0000000..8e731c1
--- /dev/null
+++ b/qadence_libs/qinfo_tools/qng.py
@@ -0,0 +1,180 @@
+from __future__ import annotations
+
+from typing import Callable, Sequence
+
+import torch
+from qadence import QuantumCircuit, QuantumModel
+from qadence.logger import get_logger
+from qadence.ml_tools.models import TransformedModule
+from torch.optim.optimizer import Optimizer, required
+
+from qadence_libs.qinfo_tools.qfi import get_quantum_fisher, get_quantum_fisher_spsa
+from qadence_libs.types import FisherApproximation
+
+logger = get_logger(__name__)
+
+
+class QuantumNaturalGradient(Optimizer):
+    """Implements the Quantum Natural Gradient Algorithm.
+
+    There are currently two variants of the algorithm implemented: exact QNG and
+    the SPSA approximation.
+
+    WARNING: The exact QNG optimizer is very inefficient both in time and memory as
+    it calculates the exact Quantum Fisher Information of the circuit at every
+    iteration. Therefore, it is not meant to be run with medium to large circuits.
+    Other approximations such as the SPSA are much more efficient while retaining
+    good performance.
+    """
+
+    def __init__(
+        self,
+        params: Sequence,
+        model: QuantumModel = required,
+        lr: float = required,
+        approximation: FisherApproximation | str = FisherApproximation.SPSA,
+        beta: float = 10e-3,
+        epsilon: float = 10e-2,
+    ):
+        """
+        Args:
+
+            params (tuple | torch.Tensor): Variational parameters to be updated
+            model (QuantumModel):
+                Model to be optimized. The optimizers needs to access its quantum circuit
+                to compute the QFI matrix.
+            lr (float): Learning rate.
+            approximation (FisherApproximation):
+                Approximation used to compute the QFI matrix. Defaults to FisherApproximation.SPSA
+            beta (float):
+                Shift applied to the QFI matrix before inversion to ensure numerical stability.
+                Defaults to 10e-3.
+            epsilon (float):
+                Finite difference applied when computing the SPSA derivatives. Defaults to 10e-2.
+        """
+
+        if 0.0 > lr:
+            raise ValueError(f"Invalid learning rate: {lr}")
+        if 0.0 >= beta:
+            raise ValueError(f"Invalid beta value: {beta}")
+        if 0.0 > epsilon:
+            raise ValueError(f"Invalid epsilon value: {epsilon}")
+
+        if isinstance(model, TransformedModule):
+            logger.warning(
+                "The model is of type '<class TransformedModule>. "
+                "Keep in mind that the QNG optimizer can only optimize circuit "
+                "parameters. Input and output shifting/scaling parameters will not be optimized."
+            )
+            # Retrieve the quantum model from the TransformedModule
+            model = model.model
+        if not isinstance(model, QuantumModel):
+            raise TypeError(
+                "The model should be an instance of '<class QuantumModel>' "
+                f"or '<class TransformedModule>'. Got {type(model)}."
+            )
+
+        self.param_dict = model.vparams
+        self.circuit = model._circuit.abstract
+        if not isinstance(self.circuit, QuantumCircuit):
+            raise TypeError(
+                "The circuit should be an instance of '<class QuantumCircuit>'."
+                "Got {type(self.circuit)}"
+            )
+
+        defaults = dict(
+            model=model,
+            lr=lr,
+            approximation=approximation,
+            beta=beta,
+            epsilon=epsilon,
+        )
+        super().__init__(params, defaults)
+
+        if approximation == FisherApproximation.SPSA:
+            state = self.state["state"]
+            state.setdefault("iter", 0)
+            state.setdefault("qfi_estimator", None)
+
+    def step(self, closure: Callable | None = None) -> torch.Tensor:
+        """Performs a single optimization step of the QNG algorithm.
+
+        Arguments:
+            closure (callable, optional): A closure that reevaluates the model
+                and returns the loss.
+        """
+        loss = None
+        if closure is not None:
+            loss = closure()
+
+        for group in self.param_groups:
+            vparams_values = [p for p in group["params"] if p.requires_grad]
+
+            # Build the parameter dictionary
+            # We rely on the `vparam()` method in `QuantumModel` and the
+            # `parameters()` in `nn.Module` to give the same param ordering.
+            # We test for this in `test_qng.py`.
+            vparams_dict = dict(zip(self.param_dict.keys(), vparams_values))
+
+            approximation = group["approximation"]
+            grad_vec = torch.tensor([v.grad.data for v in vparams_values])
+            if approximation == FisherApproximation.EXACT:
+                # Calculate the EXACT metric tensor
+                metric_tensor = 0.25 * get_quantum_fisher(
+                    self.circuit,
+                    vparams_dict=vparams_dict,
+                )
+
+                with torch.no_grad():
+                    # Apply a finite shift to the metric tensor to avoid numerical
+                    # stability issues when solving the least squares problem
+                    metric_tensor = metric_tensor + group["beta"] * torch.eye(len(grad_vec))
+
+                    # Get transformed gradient vector solving the least squares problem
+                    transf_grad = torch.linalg.lstsq(
+                        metric_tensor,
+                        grad_vec,
+                        driver="gelsd",
+                    ).solution
+
+                    # Update parameters
+                    for i, p in enumerate(vparams_values):
+                        p.data.add_(transf_grad[i], alpha=-group["lr"])
+
+            elif approximation == FisherApproximation.SPSA:
+                state = self.state["state"]
+                with torch.no_grad():
+                    # Get estimation of the QFI matrix
+                    qfi_estimator, qfi_mat_positive_sd = get_quantum_fisher_spsa(
+                        circuit=self.circuit,
+                        iteration=state["iter"],
+                        vparams_dict=vparams_dict,
+                        previous_qfi_estimator=state["qfi_estimator"],
+                        epsilon=group["epsilon"],
+                        beta=group["beta"],
+                    )
+
+                    # Get transformed gradient vector solving the least squares problem
+                    transf_grad = torch.linalg.lstsq(
+                        0.25 * qfi_mat_positive_sd,
+                        grad_vec,
+                        driver="gelsd",
+                    ).solution
+
+                    # Update parameters
+                    for i, p in enumerate(vparams_values):
+                        if p.grad is None:
+                            continue
+                        p.data.add_(transf_grad[i], alpha=-group["lr"])
+
+                state["iter"] += 1
+                state["qfi_estimator"] = qfi_estimator
+
+            else:
+                raise NotImplementedError(
+                    f"Approximation {approximation} of the QNG optimizer "
+                    "is not implemented. Choose an item from the "
+                    f"FisherApproximation enum: {FisherApproximation.list()}."
+                )
+
+        return loss
diff --git a/qadence_libs/qinfo_tools/spsa.py b/qadence_libs/qinfo_tools/spsa.py
new file mode 100644
index 0000000..c676d09
--- /dev/null
+++ b/qadence_libs/qinfo_tools/spsa.py
@@ -0,0 +1,122 @@
+from __future__ import annotations
+
+import torch
+from qadence import Overlap
+from torch import Tensor
+
+
+def _create_random_direction(size: int) -> Tensor:
+    """
+    Creates a torch Tensor with `size` elements randomly drawn from {-1,+1}.
+
+    Args:
+        size (int): Size of the vector
+    """
+    x = torch.bernoulli(0.5 * torch.ones(size)).reshape(size, 1)
+    x[x == 0] = -1
+    return x
+
+
+def _shifted_overlap(
+    model: Overlap,
+    shift: Tensor,
+    fm_dict: dict[str, Tensor],
+    vparams_dict: dict[str, Tensor],
+) -> Tensor:
+    """
+    Forward method of the Overlap model with shifted values of the ket variational parameters.
+
+    Args:
+        model (Overlap): Overlap model
+        shift (float): Quantity of the shift
+        fm_dict (dict[str, Tensor]): Feature map dictionary
+        vparams_dict (dict[str, Tensor]): Variational parameter dictionary
+    """
+    shifted_vparams_dict = {k: (v + s) for (k, v), s in zip(vparams_dict.items(), shift)}
+
+    ovrlp_shifted = model(
+        bra_param_values=fm_dict | vparams_dict,
+        ket_param_values=fm_dict | shifted_vparams_dict,
+    )
+    return ovrlp_shifted
+
+
+def spsa_gradient_step(
+    model: Overlap,
+    epsilon: float,
+    fm_dict: dict[str, Tensor],
+    vparams_dict: dict[str, Tensor] = dict(),
+) -> Tensor:
+    """Single step of the first order SPSA gradient.
+
+    Calculates a single step of the SPSA algorithm to calculate
+    the first order gradient of the given Overlap model.
+
+    Args:
+        model (Overlap): Overlap model
+        epsilon (float): Finite step size
+        fm_dict (dict[str, Tensor]): Feature map dictionary
+        vparams_dict (dict[str, Tensor]): Variational parameters dictionary
+    """
+    if not vparams_dict:
+        vparams_dict = {k: v for (k, v) in model._params.items() if v.requires_grad}
+
+    # Create random direction
+    random_direction = _create_random_direction(size=model.num_vparams)
+
+    # Shift ket variational parameters
+    shift = epsilon * random_direction
+
+    # Overlaps with the shifted parameters
+    ovrlp_shifted_plus = _shifted_overlap(model, shift, fm_dict, vparams_dict)
+    ovrlp_shifted_minus = _shifted_overlap(model, -shift, fm_dict, vparams_dict)
+
+    return random_direction * (ovrlp_shifted_plus - ovrlp_shifted_minus) / (2 * epsilon)
+
+
+def spsa_2gradient_step(
+    model: Overlap,
+    epsilon: float,
+    fm_dict: dict[str, Tensor],
+    vparams_dict: dict[str, Tensor] = dict(),
+) -> Tensor:
+    """Single step of the second order SPSA gradient.
+
+    Calculates a single step of the SPSA algorithm to calculate
+    the second order gradient of the given Overlap model.
+
+    TODO: implement recursively using the first order function
+
+    Args:
+        model (Overlap): Overlap model
+        epsilon (float): Finite step size
+        fm_dict (dict[str, Tensor]): Feature map dictionary
+        vparams_dict (dict[str, Tensor]): Variational parameters dictionary
+    """
+    if not vparams_dict:
+        vparams_dict = {k: v for (k, v) in model._params.items() if v.requires_grad}
+
+    # Create random directions
+    rand_dir1 = _create_random_direction(size=model.num_vparams)
+    rand_dir2 = _create_random_direction(size=model.num_vparams)
+
+    # Overlaps with the shifted parameters
+    shift_p1 = epsilon * rand_dir1
+    ovrlp_shifted_p1 = _shifted_overlap(model, shift_p1, fm_dict, vparams_dict)
+    shift_p1p2 = epsilon * (rand_dir1 + rand_dir2)
+    ovrlp_shifted_p1p2 = _shifted_overlap(model, shift_p1p2, fm_dict, vparams_dict)
+    shift_m1 = -epsilon * rand_dir1
+    ovrlp_shifted_m1 = _shifted_overlap(model, shift_m1, fm_dict, vparams_dict)
+    shift_m1p2 = epsilon * (-rand_dir1 + rand_dir2)
+    ovrlp_shifted_m1p2 = _shifted_overlap(model, shift_m1p2, fm_dict, vparams_dict)
+
+    # Prefactor
+    delta_F = ovrlp_shifted_p1p2 - ovrlp_shifted_p1 - ovrlp_shifted_m1p2 + ovrlp_shifted_m1
+
+    # Hessian
+    dir_product = torch.matmul(rand_dir1, rand_dir2.transpose(0, 1)) + torch.matmul(
+        rand_dir2, rand_dir1.transpose(0, 1)
+    )
+    hess = (4 * (epsilon**2)) ** (-1) * delta_F * dir_product
+
+    return hess
diff --git a/qadence_libs/qinfo_tools/utils.py b/qadence_libs/qinfo_tools/utils.py
new file mode 100644
index 0000000..431d8f0
--- /dev/null
+++ b/qadence_libs/qinfo_tools/utils.py
@@ -0,0 +1,42 @@
+from __future__ import annotations
+
+import torch
+from torch import Tensor
+from torch.autograd import grad
+
+
+def hessian(output: Tensor, inputs: list[Tensor]) -> Tensor:
+    """Calculates the Hessian of a given output vector wrt the inputs.
+
+    TODO: Use autograd built-in functions for a more efficient implementation,
+    but grad tree
+    is broken by the Overlap method
+
+    Args:
+        output (Tensor): Output vector
+        inputs (list[Tensor]): List of input parameters
+    """
+
+    jacobian = grad(
+        output,
+        inputs,
+        torch.ones_like(output),
+        create_graph=True,
+        allow_unused=True,
+    )
+
+    n_params = len(inputs)
+    hess = torch.empty((n_params, n_params))
+    for i in range(n_params):
+        jacobian_var = jacobian[i]
+        ovrlp_grad2 = grad(
+            jacobian_var,
+            inputs,
+            torch.ones_like(jacobian_var),
+            create_graph=True,
+            allow_unused=True,
+        )
+        for j in range(n_params):
+            hess[i, j] = ovrlp_grad2[j]
+
+    return hess
diff --git a/qadence_libs/types.py b/qadence_libs/types.py
index f5b91ca..ebc27a0 100644
--- a/qadence_libs/types.py
+++ b/qadence_libs/types.py
@@ -2,7 +2,7 @@
 
 from qadence.types import StrEnum
 
-__all__ = ["BasisSet", "ReuploadScaling"]
+__all__ = ["BasisSet", "ReuploadScaling", "FisherApproximation"]
 
 
 class BasisSet(StrEnum):
@@ -23,3 +23,12 @@ class ReuploadScaling(StrEnum):
     """Linearly increasing scaling."""
     EXP = "Exponential"
     """Exponentially increasing scaling."""
+
+
+class FisherApproximation(StrEnum):
+    """Approximation to calculate the Quantum Fisher Information (QFI)."""
+
+    EXACT = "Exact"
+    """No approximation, computes the exact QFI."""
+    SPSA = "SPSA"
+    """Approximation via the SPSA algorithm."""
diff --git a/tests/conftest.py b/tests/conftest.py
index 3014d28..551d010 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,2 +1,45 @@
 # use this file for configuring test fixtures and
 # functions common to every test
+from __future__ import annotations
+
+import torch
+from pytest import fixture
+from qadence import QNN, BasisSet, FeatureParameter, QuantumCircuit
+from qadence.constructors import feature_map, hamiltonian_factory, hea
+from qadence.operations import RX, RY, Z
+
+
+@fixture
+def textbook_qfi_model() -> QNN:
+    n_qubits, n_layers = [2, 2]
+    feature_param = FeatureParameter("phi", value=0)
+    fm = feature_map(n_qubits, range(n_qubits), param=feature_param, fm_type=BasisSet.FOURIER)
+    ansatz = hea(
+        n_qubits,
+        n_layers,
+        param_prefix="theta",
+        operations=[RX],
+        periodic=True,
+    )
+    circuit = QuantumCircuit(n_qubits, ansatz, fm)
+    obs = hamiltonian_factory(n_qubits, detuning=Z)
+    model = QNN(circuit, [obs])
+    model.reset_vparams(torch.zeros(model.num_vparams))
+    return model
+
+
+@fixture
+def basic_optim_model() -> QNN:
+    n_qubits, n_layers = [2, 2]
+    fm = feature_map(n_qubits, range(n_qubits), param="phi", fm_type=BasisSet.FOURIER)
+    ansatz = hea(
+        n_qubits,
+        depth=n_layers,
+        param_prefix="theta",
+        operations=[RX, RY],
+        periodic=True,
+    )
+    circuit = QuantumCircuit(n_qubits, fm, ansatz)
+    obs = hamiltonian_factory(n_qubits, detuning=Z)
+    model = QNN(circuit, [obs])
+    return model
diff --git a/tests/qinfo_tools/test_qfi.py b/tests/qinfo_tools/test_qfi.py
new file mode 100644
index 0000000..7741a4c
--- /dev/null
+++ b/tests/qinfo_tools/test_qfi.py
@@ -0,0 +1,84 @@
+from __future__ import annotations
+
+import random
+
+import numpy as np
+import torch
+from qadence import QNN
+from torch import Size, allclose
+
+from qadence_libs.qinfo_tools import get_quantum_fisher, get_quantum_fisher_spsa
+from qadence_libs.qinfo_tools.qfi import _positive_semidefinite_sqrt
+
+SEED = 42
+torch.manual_seed(SEED)
+np.random.seed(SEED)
+random.seed(SEED)
+
+
+# Textbook QFI matrix for textbook_qfi_model with 2 qubits and 2 layers
+textbook_qfi = torch.Tensor(
+    [
+        [1.0, 0.0, 0.0, 0.0],
+        [0.0, 1.0, 0.0, 1.0],
+        [0.0, 0.0, 1.0, 0.0],
+        [0.0, 1.0, 0.0, 1.0],
+    ]
+)
+
+
+def test_positive_semidefinite_sqrt() -> None:
+    # Construct a positive semidefinite matrix
+    A = textbook_qfi @ textbook_qfi
+    matrix_sqrt = _positive_semidefinite_sqrt(A)
+    assert torch.allclose(matrix_sqrt @ matrix_sqrt, A)
+
+
+def test_qfi_exact_no_params(textbook_qfi_model: QNN) -> None:
+    # As long as FM parameters and variational parameters are independent,
+    # the QFI matrix should be independent of the value of the FM params.
+    circuit = textbook_qfi_model._circuit.abstract
+    qfi_mat_exact = get_quantum_fisher(circuit)
+    n_vparams = textbook_qfi_model.num_vparams
+    assert qfi_mat_exact.shape == Size([n_vparams, n_vparams])
+    assert allclose(textbook_qfi, qfi_mat_exact)
+
+
+def test_qfi_exact_with_params(textbook_qfi_model: QNN) -> None:
+    circuit = textbook_qfi_model._circuit.abstract
+    vparams_dict = textbook_qfi_model.vparams
+    fm_dict = {"phi": torch.Tensor([0])}
+    qfi_mat_exact = get_quantum_fisher(circuit, vparams_dict=vparams_dict, fm_dict=fm_dict)
+    n_vparams = len(vparams_dict)
+    assert qfi_mat_exact.shape == Size([n_vparams, n_vparams])
+    assert allclose(textbook_qfi, qfi_mat_exact)
+
+
+def test_qfi_spsa(textbook_qfi_model: QNN) -> None:
+    circuit = textbook_qfi_model._circuit.abstract
+    vparams_dict = textbook_qfi_model.vparams
+    fm_dict = {"phi": torch.Tensor([0])}
+    qfi_mat_spsa = None
+    for iteration in range(40):
+        qfi_mat_spsa, qfi_positive_sd = get_quantum_fisher_spsa(
+            circuit,
+            iteration,
+            vparams_dict=vparams_dict,
+            fm_dict=fm_dict,
+            previous_qfi_estimator=qfi_mat_spsa,
+            beta=0.01,
+            epsilon=0.01,
+        )
+        if iteration == 0:
+            initial_nrm = torch.linalg.norm(textbook_qfi - qfi_mat_spsa)
+
+    final_nrm = torch.linalg.norm(textbook_qfi - qfi_mat_spsa)
+    assert final_nrm < initial_nrm
+
+    n_vparams = len(vparams_dict)
+    assert qfi_mat_spsa.shape == Size([n_vparams, n_vparams])  # type: ignore
+    assert qfi_positive_sd.shape == Size([n_vparams, n_vparams])
+
+    # Check that qfi_positive_sd is positive semi-definite
+    eigvals = torch.linalg.eigh(qfi_positive_sd)[0]
+    assert torch.all(eigvals >= 0)
diff --git a/tests/qinfo_tools/test_qng.py b/tests/qinfo_tools/test_qng.py
new file mode 100644
index 0000000..f6fcc18
--- /dev/null
+++ b/tests/qinfo_tools/test_qng.py
@@ -0,0 +1,92 @@
+from __future__ import annotations
+
+import random
+
+import numpy as np
+import pytest
+import torch
+from qadence import QuantumCircuit
+from torch import Tensor
+
+from qadence_libs.qinfo_tools import QuantumNaturalGradient
+from qadence_libs.types import FisherApproximation
+
+SEED = 42
+torch.manual_seed(SEED)
+np.random.seed(SEED)
+random.seed(SEED)
+
+
+def quadratic_dataset(samples: int) -> tuple[Tensor, Tensor]:
+    x_train = torch.rand(samples)
+    return x_train, x_train**2
+
+
+def sin_dataset(samples: int) -> tuple[Tensor, Tensor]:
+    x_train = torch.rand(samples)
+    return x_train, torch.sin(x_train)
+
+
+# Optimizers config [optim, config, iters]
+OPTIMIZERS_CONFIG = [
+    (
+        {
+            "lr": 0.1,
+            "approximation": FisherApproximation.EXACT,
+            "beta": 0.01,
+        },
+        20,
+    ),
+    (
+        {
+            "lr": 0.01,
+            "approximation": FisherApproximation.SPSA,
+            "beta": 0.1,
+            "epsilon": 0.01,
+        },
+        20,
+    ),
+]
+samples = 100
+DATASETS = [quadratic_dataset(samples), sin_dataset(samples)]
+
+
+def test_parameter_ordering(basic_optim_model: QuantumCircuit) -> None:
+    model = basic_optim_model
+    model.reset_vparams(torch.rand((len(model.vparams))))
+    vparams_torch = [p.data for p in model.parameters() if p.requires_grad]
+    vparams_qadence = [v.data for v in model.vparams.values()]
+    assert len(vparams_torch) == len(vparams_qadence)
+    msg = (
+        "The ordering of the output of the `vparams()` method in QuantumModel"
+        + "and the `parameters()` method in Torch is not consistent"
+        + "for variational parameters."
+    )
+    assert vparams_torch == vparams_qadence, msg
+
+
+@pytest.mark.parametrize("dataset", DATASETS)
+@pytest.mark.parametrize("optim_config", OPTIMIZERS_CONFIG)
+def test_optims(
+    dataset: tuple[Tensor, Tensor], optim_config: dict, basic_optim_model: QuantumCircuit
+) -> None:
+    model = basic_optim_model
+    model.reset_vparams(torch.ones((len(model.vparams))))
+
+    config, iters = optim_config
+    x_train, y_train = dataset
+    mse_loss = torch.nn.MSELoss()
+    vparams = [p for p in model.parameters() if p.requires_grad]
+    optimizer = QuantumNaturalGradient(params=vparams, model=model, **config)
+    initial_loss = mse_loss(model(x_train).squeeze(), y_train.squeeze())
+    for _ in range(iters):
+        optimizer.zero_grad()
+        loss = mse_loss(model(values=x_train).squeeze(), y_train.squeeze())
+        loss.backward()
+        optimizer.step()
+
+    assert initial_loss > 2.0 * loss
+
+    if config["approximation"] == FisherApproximation.SPSA:
+        assert optimizer.state["state"]["iter"] == iters
+        assert optimizer.state["state"]["qfi_estimator"] is not None
diff --git a/tests/qinfo_tools/test_spsa.py b/tests/qinfo_tools/test_spsa.py
new file mode 100644
index 0000000..ba8b3bc
--- /dev/null
+++ b/tests/qinfo_tools/test_spsa.py
@@ -0,0 +1,49 @@
+from __future__ import annotations
+
+import random
+
+import numpy as np
+import pytest
+import torch
+from qadence import QNN, Overlap
+from torch import Size
+
+from qadence_libs.qinfo_tools.spsa import _shifted_overlap, spsa_2gradient_step
+
+SEED = 42
+torch.manual_seed(SEED)
+np.random.seed(SEED)
+random.seed(SEED)
+
+
+@pytest.mark.parametrize("shift", [0.0, 0.1, 0.5])
+@pytest.mark.parametrize("phi", [0.0, 0.1])
+def test_shifted_overlap(shift: float, phi: float, textbook_qfi_model: QNN) -> None:
+    circuit = textbook_qfi_model._circuit.abstract
+    fm_dict = {"phi": torch.Tensor([phi])}
+
+    ovrlp_model = Overlap(circuit, circuit)
+    vparams_dict = {k: v for (k, v) in ovrlp_model._params.items() if v.requires_grad}
+
+    shift_tensor = shift * torch.ones(len(vparams_dict))
+    ovrlp = _shifted_overlap(
+        ovrlp_model,
+        shift_tensor,
+        fm_dict,
+        vparams_dict,
+    )
+    if shift == 0.0:
+        assert torch.isclose(ovrlp, torch.ones_like(ovrlp))
+    if shift == 0.1:
+        assert not torch.isclose(ovrlp, torch.ones_like(ovrlp))
+
+
+@pytest.mark.parametrize("epsilon", [0.01, 0.001])
+def test_spsa_2gradient(epsilon: float, textbook_qfi_model: QNN) -> None:
+    circuit = textbook_qfi_model._circuit.abstract
+    fm_dict = {"phi": torch.Tensor([0.0])}
+    ovrlp_model = Overlap(circuit, circuit)
+    hess_spsa = spsa_2gradient_step(ovrlp_model, epsilon, fm_dict)
+
+    assert hess_spsa.shape == Size([ovrlp_model.num_vparams, ovrlp_model.num_vparams])
+    assert torch.all(torch.isreal(hess_spsa)), "Hessian matrix is not real"