From f4d26921ab55612c74edb9c888d177576c28786f Mon Sep 17 00:00:00 2001
From: Hao Song <nuacesh@gmail.com>
Date: Fri, 1 Apr 2022 13:32:09 +0100
Subject: [PATCH 01/14] Add support for linear-time MMD estimator.

---
 alibi_detect/cd/base.py                   |  2 +
 alibi_detect/cd/mmd.py                    |  3 +
 alibi_detect/cd/pytorch/mmd.py            | 34 +++++++----
 alibi_detect/cd/tensorflow/mmd.py         | 31 +++++++---
 alibi_detect/utils/pytorch/distance.py    | 70 +++++++++++++++++++++-
 alibi_detect/utils/pytorch/kernels.py     | 17 ++++--
 alibi_detect/utils/tensorflow/distance.py | 71 ++++++++++++++++++++++-
 alibi_detect/utils/tensorflow/kernels.py  | 13 ++++-
 8 files changed, 213 insertions(+), 28 deletions(-)

diff --git a/alibi_detect/cd/base.py b/alibi_detect/cd/base.py
index 43b435464..9f391a48d 100644
--- a/alibi_detect/cd/base.py
+++ b/alibi_detect/cd/base.py
@@ -452,6 +452,7 @@ def __init__(
             self,
             x_ref: Union[np.ndarray, list],
             p_val: float = .05,
+            estimator: str = 'quad',
             preprocess_x_ref: bool = True,
             update_x_ref: Optional[Dict[str, int]] = None,
             preprocess_fn: Optional[Callable] = None,
@@ -504,6 +505,7 @@ def __init__(
 
         # optionally already preprocess reference data
         self.p_val = p_val
+        self.estimator = estimator
         if preprocess_x_ref and isinstance(preprocess_fn, Callable):  # type: ignore[arg-type]
             self.x_ref = preprocess_fn(x_ref)
         else:
diff --git a/alibi_detect/cd/mmd.py b/alibi_detect/cd/mmd.py
index 8d0bf4ba6..0ffabe166 100644
--- a/alibi_detect/cd/mmd.py
+++ b/alibi_detect/cd/mmd.py
@@ -18,6 +18,7 @@ def __init__(
             x_ref: Union[np.ndarray, list],
             backend: str = 'tensorflow',
             p_val: float = .05,
+            estimator: str = 'quad',
             preprocess_x_ref: bool = True,
             update_x_ref: Optional[Dict[str, int]] = None,
             preprocess_fn: Optional[Callable] = None,
@@ -40,6 +41,8 @@ def __init__(
             Backend used for the MMD implementation.
         p_val
             p-value used for the significance of the permutation test.
+        estimator
+            Estimator used for the MMD^2 computation {'quad', 'linear'}.
         preprocess_x_ref
             Whether to already preprocess and store the reference data.
         update_x_ref
diff --git a/alibi_detect/cd/pytorch/mmd.py b/alibi_detect/cd/pytorch/mmd.py
index a98a39a32..f3c0a3a92 100644
--- a/alibi_detect/cd/pytorch/mmd.py
+++ b/alibi_detect/cd/pytorch/mmd.py
@@ -3,7 +3,7 @@
 import torch
 from typing import Callable, Dict, Optional, Tuple, Union
 from alibi_detect.cd.base import BaseMMDDrift
-from alibi_detect.utils.pytorch.distance import mmd2_from_kernel_matrix
+from alibi_detect.utils.pytorch.distance import mmd2_from_kernel_matrix, linear_mmd2
 from alibi_detect.utils.pytorch.kernels import GaussianRBF
 
 logger = logging.getLogger(__name__)
@@ -14,6 +14,7 @@ def __init__(
             self,
             x_ref: Union[np.ndarray, list],
             p_val: float = .05,
+            estimator: Optional[str] = 'quad',
             preprocess_x_ref: bool = True,
             update_x_ref: Optional[Dict[str, int]] = None,
             preprocess_fn: Optional[Callable] = None,
@@ -34,6 +35,8 @@ def __init__(
             Data used as reference distribution.
         p_val
             p-value used for the significance of the permutation test.
+        estimator
+            Estimator used for the MMD^2 computation {'quad', 'linear'}.
         preprocess_x_ref
             Whether to already preprocess and store the reference data.
         update_x_ref
@@ -62,6 +65,7 @@ def __init__(
         super().__init__(
             x_ref=x_ref,
             p_val=p_val,
+            estimator=estimator,
             preprocess_x_ref=preprocess_x_ref,
             update_x_ref=update_x_ref,
             preprocess_fn=preprocess_fn,
@@ -122,13 +126,23 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
         x = torch.from_numpy(x).to(self.device)  # type: ignore[assignment]
         # compute kernel matrix, MMD^2 and apply permutation test using the kernel matrix
         n = x.shape[0]
-        kernel_mat = self.kernel_matrix(x_ref, x)  # type: ignore[arg-type]
-        kernel_mat = kernel_mat - torch.diag(kernel_mat.diag())  # zero diagonal
-        mmd2 = mmd2_from_kernel_matrix(kernel_mat, n, permute=False, zero_diag=False)
-        mmd2_permuted = torch.Tensor(
-            [mmd2_from_kernel_matrix(kernel_mat, n, permute=True, zero_diag=False) for _ in range(self.n_permutations)]
-        )
-        if self.device.type == 'cuda':
-            mmd2, mmd2_permuted = mmd2.cpu(), mmd2_permuted.cpu()
-        p_val = (mmd2 <= mmd2_permuted).float().mean()
+        m = x_ref.shape[0]
+        if self.estimator == 'linear':
+            n_hat = int(np.floor(min(n, m) / 2) * 2)
+            x_ref = x_ref[:n_hat, :]
+            x = x[:n_hat, :]
+            mmd2 = linear_mmd2(x_ref, x, self.kernel, permute=False)
+            mmd2_permuted = torch.tensor([linear_mmd2(x_ref, x, self.kernel, permute=True)
+                                          for _ in range(self.n_permutations)])
+            p_val = (mmd2 <= mmd2_permuted).float().mean()
+        elif self.estimator == 'quad': 
+            kernel_mat = self.kernel_matrix(x_ref, x)  # type: ignore[arg-type]
+            kernel_mat = kernel_mat - torch.diag(kernel_mat.diag())  # zero diagonal
+            mmd2 = mmd2_from_kernel_matrix(kernel_mat, n, permute=False, zero_diag=False)
+            mmd2_permuted = torch.Tensor(
+                [mmd2_from_kernel_matrix(kernel_mat, n, permute=True, zero_diag=False) for _ in range(self.n_permutations)]
+            )
+            if self.device.type == 'cuda':
+                mmd2, mmd2_permuted = mmd2.cpu(), mmd2_permuted.cpu()
+            p_val = (mmd2 <= mmd2_permuted).float().mean()
         return p_val.numpy().item(), mmd2.numpy().item(), mmd2_permuted.numpy()
diff --git a/alibi_detect/cd/tensorflow/mmd.py b/alibi_detect/cd/tensorflow/mmd.py
index 16ae6fb2b..0f6ba38c8 100644
--- a/alibi_detect/cd/tensorflow/mmd.py
+++ b/alibi_detect/cd/tensorflow/mmd.py
@@ -3,7 +3,7 @@
 import tensorflow as tf
 from typing import Callable, Dict, Optional, Tuple, Union
 from alibi_detect.cd.base import BaseMMDDrift
-from alibi_detect.utils.tensorflow.distance import mmd2_from_kernel_matrix
+from alibi_detect.utils.tensorflow.distance import mmd2_from_kernel_matrix, linear_mmd2
 from alibi_detect.utils.tensorflow.kernels import GaussianRBF
 
 logger = logging.getLogger(__name__)
@@ -14,6 +14,7 @@ def __init__(
             self,
             x_ref: Union[np.ndarray, list],
             p_val: float = .05,
+            estimator: str = 'quad',
             preprocess_x_ref: bool = True,
             update_x_ref: Optional[Dict[str, int]] = None,
             preprocess_fn: Optional[Callable] = None,
@@ -33,6 +34,8 @@ def __init__(
             Data used as reference distribution.
         p_val
             p-value used for the significance of the permutation test.
+        estimator
+            Estimator used for the MMD^2 computation {'quad', 'linear'}.
         preprocess_x_ref
             Whether to already preprocess and store the reference data.
         update_x_ref
@@ -58,6 +61,7 @@ def __init__(
         super().__init__(
             x_ref=x_ref,
             p_val=p_val,
+            estimator=estimator,
             preprocess_x_ref=preprocess_x_ref,
             update_x_ref=update_x_ref,
             preprocess_fn=preprocess_fn,
@@ -107,12 +111,21 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
         x_ref, x = self.preprocess(x)
         # compute kernel matrix, MMD^2 and apply permutation test using the kernel matrix
         n = x.shape[0]
-        kernel_mat = self.kernel_matrix(x_ref, x)
-        kernel_mat = kernel_mat - tf.linalg.diag(tf.linalg.diag_part(kernel_mat))  # zero diagonal
-        mmd2 = mmd2_from_kernel_matrix(kernel_mat, n, permute=False, zero_diag=False).numpy()
-        mmd2_permuted = np.array(
-            [mmd2_from_kernel_matrix(kernel_mat, n, permute=True, zero_diag=False).numpy()
-             for _ in range(self.n_permutations)]
-        )
-        p_val = (mmd2 <= mmd2_permuted).mean()
+        m = x_ref.shape[0]
+        if self.estimator == 'linear':
+            n_hat = int(np.floor(min(n, m) / 2) * 2)
+            x_ref = x_ref[:n_hat, :]
+            x = x[:n_hat, :]
+            mmd2 = linear_mmd2(x_ref, x, self.kernel, permute=False).numpy()
+            mmd2_permuted = np.array([linear_mmd2(x_ref, x, self.kernel, permute=True).numpy()
+                                      for _ in range(self.n_permutations)])
+            p_val = (mmd2 <= mmd2_permuted).mean() 
+        elif self.estimator == 'quad':
+            kernel_mat = self.kernel_matrix(x_ref, x)
+            kernel_mat = kernel_mat - tf.linalg.diag(tf.linalg.diag_part(kernel_mat))  # zero diagonal
+            mmd2 = mmd2_from_kernel_matrix(kernel_mat, n, permute=False, zero_diag=False).numpy()
+            mmd2_permuted = np.array(
+                [mmd2_from_kernel_matrix(kernel_mat, n, permute=True, zero_diag=False).numpy()
+                for _ in range(self.n_permutations)])
+            p_val = (mmd2 <= mmd2_permuted).mean()        
         return p_val, mmd2, mmd2_permuted
diff --git a/alibi_detect/utils/pytorch/distance.py b/alibi_detect/utils/pytorch/distance.py
index b5b5e85de..76400254b 100644
--- a/alibi_detect/utils/pytorch/distance.py
+++ b/alibi_detect/utils/pytorch/distance.py
@@ -30,6 +30,28 @@ def squared_pairwise_distance(x: torch.Tensor, y: torch.Tensor, a_min: float = 1
     return dist.clamp_min_(a_min)
 
 
+def squared_distance(x: torch.Tensor, y: torch.Tensor, a_min: float = 1e-30) -> torch.Tensor:
+    """
+    PyTorch squared Euclidean distance between samples x and y.
+
+    Parameters
+    ----------
+    x
+        Batch of instances of shape [N, features].
+    y
+        Batch of instances of shape [N, features].
+    a_min
+        Lower bound to clip distance values.
+    Returns
+    -------
+    Squared Euclidean distance [N, 1].
+    """
+    x2 = x.pow(2).sum(dim=-1, keepdim=True)
+    y2 = y.pow(2).sum(dim=-1, keepdim=True)
+    dist = x2 + y2 - (2 * x * y).sum(dim=-1, keepdim=True)
+    return dist.clamp_min_(a_min)
+
+
 def batch_compute_kernel_matrix(
     x: Union[list, np.ndarray, torch.Tensor],
     y: Union[list, np.ndarray, torch.Tensor],
@@ -93,8 +115,52 @@ def batch_compute_kernel_matrix(
     return k_mat
 
 
-def mmd2_from_kernel_matrix(kernel_mat: torch.Tensor, m: int, permute: bool = False,
-                            zero_diag: bool = True) -> torch.Tensor:
+def linear_mmd2(x: torch.Tensor, 
+                y: torch.Tensor, 
+                kernel: Callable,
+                permute: bool = False) -> float:
+    """
+    Compute maximum mean discrepancy (MMD^2) between 2 samples x and y with the 
+    linear-time estimator.
+    
+    Parameters
+    ----------
+    x
+        Batch of instances of shape [Nx, features].
+    y
+        Batch of instances of shape [Ny, features].
+    kernel
+        Kernel function.
+    permute
+        Whether to permute the row indices. Used for permutation tests.
+    """
+    n = np.shape(x)[0]
+    m = np.shape(y)[0]
+    if n != m:
+        raise RuntimeError("Linear-time estimator requires equal size samples")
+    if not permute:
+        k_xx = kernel(x=x[0::2, :], y=x[1::2, :], diag=True)
+        k_yy = kernel(x=y[0::2, :], y=y[1::2, :], diag=True)
+        k_xy = kernel(x=x[0::2, :], y=y[1::2, :], diag=True)
+        k_yz = kernel(x=y[0::2, :], y=x[1::2, :], diag=True)
+    else:
+        idx = np.random.permutation(m + n)
+        xy = torch.cat([x, y], dim=0)[idx, :]
+        x_hat, y_hat = xy[:n, :], xy[n:, :]
+        k_xx = kernel(x_hat[0::2, :], x_hat[1::2, :], diag=True)
+        k_yy = kernel(y_hat[0::2, :], y_hat[1::2, :], diag=True)
+        k_xy = kernel(x_hat[0::2, :], y_hat[1::2, :], diag=True)
+        k_yz = kernel(y_hat[0::2, :], x_hat[1::2, :], diag=True)
+        
+    h = k_xx + k_yy - k_xy - k_yz
+    return torch.sum(h) / (n / 2)
+
+
+def mmd2_from_kernel_matrix(kernel_mat: torch.Tensor, 
+                            m: int, 
+                            permute: bool = False,
+                            zero_diag: bool = True,
+                            estimator: str = 'quad') -> torch.Tensor:
     """
     Compute maximum mean discrepancy (MMD^2) between 2 samples x and y from the
     full kernel matrix between the samples.
diff --git a/alibi_detect/utils/pytorch/kernels.py b/alibi_detect/utils/pytorch/kernels.py
index 7d01005b8..4ccef96a5 100644
--- a/alibi_detect/utils/pytorch/kernels.py
+++ b/alibi_detect/utils/pytorch/kernels.py
@@ -68,16 +68,25 @@ def __init__(
     def sigma(self) -> torch.Tensor:
         return self.log_sigma.exp()
 
-    def forward(self, x: Union[np.ndarray, torch.Tensor], y: Union[np.ndarray, torch.Tensor],
-                infer_sigma: bool = False) -> torch.Tensor:
+    def forward(self, x: Union[np.ndarray, torch.Tensor], 
+                y: Union[np.ndarray, torch.Tensor],
+                infer_sigma: bool = False,
+                diag: bool = False) -> torch.Tensor:
 
         x, y = torch.as_tensor(x), torch.as_tensor(y)
-        dist = distance.squared_pairwise_distance(x.flatten(1), y.flatten(1))  # [Nx, Ny]
+        if not diag:
+            dist = distance.squared_pairwise_distance(x.flatten(1), y.flatten(1))  # [Nx, Ny]
+        else:
+            dist = distance.squared_distance(x.flatten(1), y.flatten(1))  # [N, 1]
 
         if infer_sigma or self.init_required:
             if self.trainable and infer_sigma:
                 raise ValueError("Gradients cannot be computed w.r.t. an inferred sigma value")
-            sigma = self.init_sigma_fn(x, y, dist)
+            if not diag:
+                dist_hat = dist
+            else:
+                dist_hat = distance.squared_pairwise_distance(x.flatten(1), y.flatten(1))
+            sigma = self.init_sigma_fn(x, y, dist_hat)
             with torch.no_grad():
                 self.log_sigma.copy_(sigma.log().clone())
             self.init_required = False
diff --git a/alibi_detect/utils/tensorflow/distance.py b/alibi_detect/utils/tensorflow/distance.py
index 7b003fd7f..e14a6def1 100644
--- a/alibi_detect/utils/tensorflow/distance.py
+++ b/alibi_detect/utils/tensorflow/distance.py
@@ -31,6 +31,32 @@ def squared_pairwise_distance(x: tf.Tensor, y: tf.Tensor, a_min: float = 1e-30,
     return tf.clip_by_value(dist, a_min, a_max)
 
 
+def squared_distance(x: tf.Tensor, y: tf.Tensor, 
+                     a_min: float = 1e-30, a_max: float = 1e30) -> tf.Tensor:
+    """
+    TensorFlow squared Euclidean distance between samples x and y.
+
+    Parameters
+    ----------
+    x
+        Batch of instances of shape [N, features].
+    y
+        Batch of instances of shape [N, features].
+    a_min
+        Lower bound to clip distance values.
+    a_max
+        Upper bound to clip distance values.
+
+    Returns
+    -------
+    Squared Euclidean distance [N, 1].
+    """
+    x2 = tf.reduce_sum(x ** 2, axis=-1, keepdims=True)
+    y2 = tf.reduce_sum(y ** 2, axis=-1, keepdims=True)
+    dist = x2 + y2 - 2. * tf.reduce_sum(x * y, axis=-1, keepdims=True)
+    return tf.clip_by_value(dist, a_min, a_max)
+
+
 def batch_compute_kernel_matrix(
     x: Union[list, np.ndarray, tf.Tensor],
     y: Union[list, np.ndarray, tf.Tensor],
@@ -83,7 +109,50 @@ def batch_compute_kernel_matrix(
     return k_mat
 
 
-def mmd2_from_kernel_matrix(kernel_mat: tf.Tensor, m: int, permute: bool = False,
+def linear_mmd2(x: tf.Tensor, 
+                y: tf.Tensor, 
+                kernel: Callable,
+                permute: bool = False) -> float:
+    """
+    Compute maximum mean discrepancy (MMD^2) between 2 samples x and y with the 
+    linear-time estimator.
+    
+    Parameters
+    ----------
+    x
+        Batch of instances of shape [Nx, features].
+    y
+        Batch of instances of shape [Ny, features].
+    kernel
+        Kernel function.
+    permute
+        Whether to permute the row indices. Used for permutation tests.
+    """
+    n = np.shape(x)[0]
+    m = np.shape(y)[0]
+    if n != m:
+        raise RuntimeError("Linear-time estimator requires equal size samples")
+    if not permute:
+        k_xx = kernel(x=x[0::2, :], y=x[1::2, :], diag=True)
+        k_yy = kernel(x=y[0::2, :], y=y[1::2, :], diag=True)
+        k_xy = kernel(x=x[0::2, :], y=y[1::2, :], diag=True)
+        k_yz = kernel(x=y[0::2, :], y=x[1::2, :], diag=True)
+    else:
+        idx = np.random.permutation(m + n)
+        xy = tf.gather(tf.concat([x, y], axis=0), idx)
+        x_hat, y_hat = xy[:n, :], xy[n:, :]
+        k_xx = kernel(x_hat[0::2, :], x_hat[1::2, :], diag=True)
+        k_yy = kernel(y_hat[0::2, :], y_hat[1::2, :], diag=True)
+        k_xy = kernel(x_hat[0::2, :], y_hat[1::2, :], diag=True)
+        k_yz = kernel(y_hat[0::2, :], x_hat[1::2, :], diag=True)
+        
+    h = k_xx + k_yy - k_xy - k_yz
+    return tf.reduce_sum(h) / (n / 2)
+
+
+def mmd2_from_kernel_matrix(kernel_mat: tf.Tensor, 
+                            m: int, 
+                            permute: bool = False,
                             zero_diag: bool = True) -> tf.Tensor:
     """
     Compute maximum mean discrepancy (MMD^2) between 2 samples x and y from the
diff --git a/alibi_detect/utils/tensorflow/kernels.py b/alibi_detect/utils/tensorflow/kernels.py
index 2e894579e..a9a7cca5f 100644
--- a/alibi_detect/utils/tensorflow/kernels.py
+++ b/alibi_detect/utils/tensorflow/kernels.py
@@ -69,14 +69,23 @@ def __init__(
     def sigma(self) -> tf.Tensor:
         return tf.math.exp(self.log_sigma)
 
-    def call(self, x: tf.Tensor, y: tf.Tensor, infer_sigma: bool = False) -> tf.Tensor:
+    def call(self, x: tf.Tensor, y: tf.Tensor, 
+             infer_sigma: bool = False,
+             diag: bool = False) -> tf.Tensor:
         y = tf.cast(y, x.dtype)
         x, y = tf.reshape(x, (x.shape[0], -1)), tf.reshape(y, (y.shape[0], -1))  # flatten
-        dist = distance.squared_pairwise_distance(x, y)  # [Nx, Ny]
+        if not diag:
+            dist = distance.squared_pairwise_distance(x, y)  # [Nx, Ny]
+        else:
+            dist = distance.squared_distance(x, y)  # [Nx]
 
         if infer_sigma or self.init_required:
             if self.trainable and infer_sigma:
                 raise ValueError("Gradients cannot be computed w.r.t. an inferred sigma value")
+            if not diag:
+                dist_hat = dist
+            else:
+                dist_hat = distance.squared_pairwise_distance(x, y)
             sigma = self.init_sigma_fn(x, y, dist)
             self.log_sigma.assign(tf.math.log(sigma))
             self.init_required = False

From 8ef820d10033e7e4a0aec9591550acd7d77b5102 Mon Sep 17 00:00:00 2001
From: Hao Song <nuacesh@gmail.com>
Date: Fri, 1 Apr 2022 13:32:09 +0100
Subject: [PATCH 02/14] Add support for linear-time MMD estimator.

---
 alibi_detect/cd/base.py                   |  2 +
 alibi_detect/cd/mmd.py                    |  3 +
 alibi_detect/cd/pytorch/mmd.py            | 34 +++++++----
 alibi_detect/cd/tensorflow/mmd.py         | 31 +++++++---
 alibi_detect/utils/pytorch/distance.py    | 70 +++++++++++++++++++++-
 alibi_detect/utils/pytorch/kernels.py     | 17 ++++--
 alibi_detect/utils/tensorflow/distance.py | 71 ++++++++++++++++++++++-
 alibi_detect/utils/tensorflow/kernels.py  | 13 ++++-
 8 files changed, 213 insertions(+), 28 deletions(-)

diff --git a/alibi_detect/cd/base.py b/alibi_detect/cd/base.py
index 43b435464..9f391a48d 100644
--- a/alibi_detect/cd/base.py
+++ b/alibi_detect/cd/base.py
@@ -452,6 +452,7 @@ def __init__(
             self,
             x_ref: Union[np.ndarray, list],
             p_val: float = .05,
+            estimator: str = 'quad',
             preprocess_x_ref: bool = True,
             update_x_ref: Optional[Dict[str, int]] = None,
             preprocess_fn: Optional[Callable] = None,
@@ -504,6 +505,7 @@ def __init__(
 
         # optionally already preprocess reference data
         self.p_val = p_val
+        self.estimator = estimator
         if preprocess_x_ref and isinstance(preprocess_fn, Callable):  # type: ignore[arg-type]
             self.x_ref = preprocess_fn(x_ref)
         else:
diff --git a/alibi_detect/cd/mmd.py b/alibi_detect/cd/mmd.py
index 8d0bf4ba6..0ffabe166 100644
--- a/alibi_detect/cd/mmd.py
+++ b/alibi_detect/cd/mmd.py
@@ -18,6 +18,7 @@ def __init__(
             x_ref: Union[np.ndarray, list],
             backend: str = 'tensorflow',
             p_val: float = .05,
+            estimator: str = 'quad',
             preprocess_x_ref: bool = True,
             update_x_ref: Optional[Dict[str, int]] = None,
             preprocess_fn: Optional[Callable] = None,
@@ -40,6 +41,8 @@ def __init__(
             Backend used for the MMD implementation.
         p_val
             p-value used for the significance of the permutation test.
+        estimator
+            Estimator used for the MMD^2 computation {'quad', 'linear'}.
         preprocess_x_ref
             Whether to already preprocess and store the reference data.
         update_x_ref
diff --git a/alibi_detect/cd/pytorch/mmd.py b/alibi_detect/cd/pytorch/mmd.py
index a98a39a32..f3c0a3a92 100644
--- a/alibi_detect/cd/pytorch/mmd.py
+++ b/alibi_detect/cd/pytorch/mmd.py
@@ -3,7 +3,7 @@
 import torch
 from typing import Callable, Dict, Optional, Tuple, Union
 from alibi_detect.cd.base import BaseMMDDrift
-from alibi_detect.utils.pytorch.distance import mmd2_from_kernel_matrix
+from alibi_detect.utils.pytorch.distance import mmd2_from_kernel_matrix, linear_mmd2
 from alibi_detect.utils.pytorch.kernels import GaussianRBF
 
 logger = logging.getLogger(__name__)
@@ -14,6 +14,7 @@ def __init__(
             self,
             x_ref: Union[np.ndarray, list],
             p_val: float = .05,
+            estimator: Optional[str] = 'quad',
             preprocess_x_ref: bool = True,
             update_x_ref: Optional[Dict[str, int]] = None,
             preprocess_fn: Optional[Callable] = None,
@@ -34,6 +35,8 @@ def __init__(
             Data used as reference distribution.
         p_val
             p-value used for the significance of the permutation test.
+        estimator
+            Estimator used for the MMD^2 computation {'quad', 'linear'}.
         preprocess_x_ref
             Whether to already preprocess and store the reference data.
         update_x_ref
@@ -62,6 +65,7 @@ def __init__(
         super().__init__(
             x_ref=x_ref,
             p_val=p_val,
+            estimator=estimator,
             preprocess_x_ref=preprocess_x_ref,
             update_x_ref=update_x_ref,
             preprocess_fn=preprocess_fn,
@@ -122,13 +126,23 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
         x = torch.from_numpy(x).to(self.device)  # type: ignore[assignment]
         # compute kernel matrix, MMD^2 and apply permutation test using the kernel matrix
         n = x.shape[0]
-        kernel_mat = self.kernel_matrix(x_ref, x)  # type: ignore[arg-type]
-        kernel_mat = kernel_mat - torch.diag(kernel_mat.diag())  # zero diagonal
-        mmd2 = mmd2_from_kernel_matrix(kernel_mat, n, permute=False, zero_diag=False)
-        mmd2_permuted = torch.Tensor(
-            [mmd2_from_kernel_matrix(kernel_mat, n, permute=True, zero_diag=False) for _ in range(self.n_permutations)]
-        )
-        if self.device.type == 'cuda':
-            mmd2, mmd2_permuted = mmd2.cpu(), mmd2_permuted.cpu()
-        p_val = (mmd2 <= mmd2_permuted).float().mean()
+        m = x_ref.shape[0]
+        if self.estimator == 'linear':
+            n_hat = int(np.floor(min(n, m) / 2) * 2)
+            x_ref = x_ref[:n_hat, :]
+            x = x[:n_hat, :]
+            mmd2 = linear_mmd2(x_ref, x, self.kernel, permute=False)
+            mmd2_permuted = torch.tensor([linear_mmd2(x_ref, x, self.kernel, permute=True)
+                                          for _ in range(self.n_permutations)])
+            p_val = (mmd2 <= mmd2_permuted).float().mean()
+        elif self.estimator == 'quad': 
+            kernel_mat = self.kernel_matrix(x_ref, x)  # type: ignore[arg-type]
+            kernel_mat = kernel_mat - torch.diag(kernel_mat.diag())  # zero diagonal
+            mmd2 = mmd2_from_kernel_matrix(kernel_mat, n, permute=False, zero_diag=False)
+            mmd2_permuted = torch.Tensor(
+                [mmd2_from_kernel_matrix(kernel_mat, n, permute=True, zero_diag=False) for _ in range(self.n_permutations)]
+            )
+            if self.device.type == 'cuda':
+                mmd2, mmd2_permuted = mmd2.cpu(), mmd2_permuted.cpu()
+            p_val = (mmd2 <= mmd2_permuted).float().mean()
         return p_val.numpy().item(), mmd2.numpy().item(), mmd2_permuted.numpy()
diff --git a/alibi_detect/cd/tensorflow/mmd.py b/alibi_detect/cd/tensorflow/mmd.py
index 16ae6fb2b..0f6ba38c8 100644
--- a/alibi_detect/cd/tensorflow/mmd.py
+++ b/alibi_detect/cd/tensorflow/mmd.py
@@ -3,7 +3,7 @@
 import tensorflow as tf
 from typing import Callable, Dict, Optional, Tuple, Union
 from alibi_detect.cd.base import BaseMMDDrift
-from alibi_detect.utils.tensorflow.distance import mmd2_from_kernel_matrix
+from alibi_detect.utils.tensorflow.distance import mmd2_from_kernel_matrix, linear_mmd2
 from alibi_detect.utils.tensorflow.kernels import GaussianRBF
 
 logger = logging.getLogger(__name__)
@@ -14,6 +14,7 @@ def __init__(
             self,
             x_ref: Union[np.ndarray, list],
             p_val: float = .05,
+            estimator: str = 'quad',
             preprocess_x_ref: bool = True,
             update_x_ref: Optional[Dict[str, int]] = None,
             preprocess_fn: Optional[Callable] = None,
@@ -33,6 +34,8 @@ def __init__(
             Data used as reference distribution.
         p_val
             p-value used for the significance of the permutation test.
+        estimator
+            Estimator used for the MMD^2 computation {'quad', 'linear'}.
         preprocess_x_ref
             Whether to already preprocess and store the reference data.
         update_x_ref
@@ -58,6 +61,7 @@ def __init__(
         super().__init__(
             x_ref=x_ref,
             p_val=p_val,
+            estimator=estimator,
             preprocess_x_ref=preprocess_x_ref,
             update_x_ref=update_x_ref,
             preprocess_fn=preprocess_fn,
@@ -107,12 +111,21 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
         x_ref, x = self.preprocess(x)
         # compute kernel matrix, MMD^2 and apply permutation test using the kernel matrix
         n = x.shape[0]
-        kernel_mat = self.kernel_matrix(x_ref, x)
-        kernel_mat = kernel_mat - tf.linalg.diag(tf.linalg.diag_part(kernel_mat))  # zero diagonal
-        mmd2 = mmd2_from_kernel_matrix(kernel_mat, n, permute=False, zero_diag=False).numpy()
-        mmd2_permuted = np.array(
-            [mmd2_from_kernel_matrix(kernel_mat, n, permute=True, zero_diag=False).numpy()
-             for _ in range(self.n_permutations)]
-        )
-        p_val = (mmd2 <= mmd2_permuted).mean()
+        m = x_ref.shape[0]
+        if self.estimator == 'linear':
+            n_hat = int(np.floor(min(n, m) / 2) * 2)
+            x_ref = x_ref[:n_hat, :]
+            x = x[:n_hat, :]
+            mmd2 = linear_mmd2(x_ref, x, self.kernel, permute=False).numpy()
+            mmd2_permuted = np.array([linear_mmd2(x_ref, x, self.kernel, permute=True).numpy()
+                                      for _ in range(self.n_permutations)])
+            p_val = (mmd2 <= mmd2_permuted).mean() 
+        elif self.estimator == 'quad':
+            kernel_mat = self.kernel_matrix(x_ref, x)
+            kernel_mat = kernel_mat - tf.linalg.diag(tf.linalg.diag_part(kernel_mat))  # zero diagonal
+            mmd2 = mmd2_from_kernel_matrix(kernel_mat, n, permute=False, zero_diag=False).numpy()
+            mmd2_permuted = np.array(
+                [mmd2_from_kernel_matrix(kernel_mat, n, permute=True, zero_diag=False).numpy()
+                for _ in range(self.n_permutations)])
+            p_val = (mmd2 <= mmd2_permuted).mean()        
         return p_val, mmd2, mmd2_permuted
diff --git a/alibi_detect/utils/pytorch/distance.py b/alibi_detect/utils/pytorch/distance.py
index b5b5e85de..76400254b 100644
--- a/alibi_detect/utils/pytorch/distance.py
+++ b/alibi_detect/utils/pytorch/distance.py
@@ -30,6 +30,28 @@ def squared_pairwise_distance(x: torch.Tensor, y: torch.Tensor, a_min: float = 1
     return dist.clamp_min_(a_min)
 
 
+def squared_distance(x: torch.Tensor, y: torch.Tensor, a_min: float = 1e-30) -> torch.Tensor:
+    """
+    PyTorch squared Euclidean distance between samples x and y.
+
+    Parameters
+    ----------
+    x
+        Batch of instances of shape [N, features].
+    y
+        Batch of instances of shape [N, features].
+    a_min
+        Lower bound to clip distance values.
+    Returns
+    -------
+    Squared Euclidean distance [N, 1].
+    """
+    x2 = x.pow(2).sum(dim=-1, keepdim=True)
+    y2 = y.pow(2).sum(dim=-1, keepdim=True)
+    dist = x2 + y2 - (2 * x * y).sum(dim=-1, keepdim=True)
+    return dist.clamp_min_(a_min)
+
+
 def batch_compute_kernel_matrix(
     x: Union[list, np.ndarray, torch.Tensor],
     y: Union[list, np.ndarray, torch.Tensor],
@@ -93,8 +115,52 @@ def batch_compute_kernel_matrix(
     return k_mat
 
 
-def mmd2_from_kernel_matrix(kernel_mat: torch.Tensor, m: int, permute: bool = False,
-                            zero_diag: bool = True) -> torch.Tensor:
+def linear_mmd2(x: torch.Tensor, 
+                y: torch.Tensor, 
+                kernel: Callable,
+                permute: bool = False) -> float:
+    """
+    Compute maximum mean discrepancy (MMD^2) between 2 samples x and y with the 
+    linear-time estimator.
+    
+    Parameters
+    ----------
+    x
+        Batch of instances of shape [Nx, features].
+    y
+        Batch of instances of shape [Ny, features].
+    kernel
+        Kernel function.
+    permute
+        Whether to permute the row indices. Used for permutation tests.
+    """
+    n = np.shape(x)[0]
+    m = np.shape(y)[0]
+    if n != m:
+        raise RuntimeError("Linear-time estimator requires equal size samples")
+    if not permute:
+        k_xx = kernel(x=x[0::2, :], y=x[1::2, :], diag=True)
+        k_yy = kernel(x=y[0::2, :], y=y[1::2, :], diag=True)
+        k_xy = kernel(x=x[0::2, :], y=y[1::2, :], diag=True)
+        k_yz = kernel(x=y[0::2, :], y=x[1::2, :], diag=True)
+    else:
+        idx = np.random.permutation(m + n)
+        xy = torch.cat([x, y], dim=0)[idx, :]
+        x_hat, y_hat = xy[:n, :], xy[n:, :]
+        k_xx = kernel(x_hat[0::2, :], x_hat[1::2, :], diag=True)
+        k_yy = kernel(y_hat[0::2, :], y_hat[1::2, :], diag=True)
+        k_xy = kernel(x_hat[0::2, :], y_hat[1::2, :], diag=True)
+        k_yz = kernel(y_hat[0::2, :], x_hat[1::2, :], diag=True)
+        
+    h = k_xx + k_yy - k_xy - k_yz
+    return torch.sum(h) / (n / 2)
+
+
+def mmd2_from_kernel_matrix(kernel_mat: torch.Tensor, 
+                            m: int, 
+                            permute: bool = False,
+                            zero_diag: bool = True,
+                            estimator: str = 'quad') -> torch.Tensor:
     """
     Compute maximum mean discrepancy (MMD^2) between 2 samples x and y from the
     full kernel matrix between the samples.
diff --git a/alibi_detect/utils/pytorch/kernels.py b/alibi_detect/utils/pytorch/kernels.py
index 7d01005b8..4ccef96a5 100644
--- a/alibi_detect/utils/pytorch/kernels.py
+++ b/alibi_detect/utils/pytorch/kernels.py
@@ -68,16 +68,25 @@ def __init__(
     def sigma(self) -> torch.Tensor:
         return self.log_sigma.exp()
 
-    def forward(self, x: Union[np.ndarray, torch.Tensor], y: Union[np.ndarray, torch.Tensor],
-                infer_sigma: bool = False) -> torch.Tensor:
+    def forward(self, x: Union[np.ndarray, torch.Tensor], 
+                y: Union[np.ndarray, torch.Tensor],
+                infer_sigma: bool = False,
+                diag: bool = False) -> torch.Tensor:
 
         x, y = torch.as_tensor(x), torch.as_tensor(y)
-        dist = distance.squared_pairwise_distance(x.flatten(1), y.flatten(1))  # [Nx, Ny]
+        if not diag:
+            dist = distance.squared_pairwise_distance(x.flatten(1), y.flatten(1))  # [Nx, Ny]
+        else:
+            dist = distance.squared_distance(x.flatten(1), y.flatten(1))  # [N, 1]
 
         if infer_sigma or self.init_required:
             if self.trainable and infer_sigma:
                 raise ValueError("Gradients cannot be computed w.r.t. an inferred sigma value")
-            sigma = self.init_sigma_fn(x, y, dist)
+            if not diag:
+                dist_hat = dist
+            else:
+                dist_hat = distance.squared_pairwise_distance(x.flatten(1), y.flatten(1))
+            sigma = self.init_sigma_fn(x, y, dist_hat)
             with torch.no_grad():
                 self.log_sigma.copy_(sigma.log().clone())
             self.init_required = False
diff --git a/alibi_detect/utils/tensorflow/distance.py b/alibi_detect/utils/tensorflow/distance.py
index 7b003fd7f..e14a6def1 100644
--- a/alibi_detect/utils/tensorflow/distance.py
+++ b/alibi_detect/utils/tensorflow/distance.py
@@ -31,6 +31,32 @@ def squared_pairwise_distance(x: tf.Tensor, y: tf.Tensor, a_min: float = 1e-30,
     return tf.clip_by_value(dist, a_min, a_max)
 
 
+def squared_distance(x: tf.Tensor, y: tf.Tensor, 
+                     a_min: float = 1e-30, a_max: float = 1e30) -> tf.Tensor:
+    """
+    TensorFlow squared Euclidean distance between samples x and y.
+
+    Parameters
+    ----------
+    x
+        Batch of instances of shape [N, features].
+    y
+        Batch of instances of shape [N, features].
+    a_min
+        Lower bound to clip distance values.
+    a_max
+        Upper bound to clip distance values.
+
+    Returns
+    -------
+    Squared Euclidean distance [N, 1].
+    """
+    x2 = tf.reduce_sum(x ** 2, axis=-1, keepdims=True)
+    y2 = tf.reduce_sum(y ** 2, axis=-1, keepdims=True)
+    dist = x2 + y2 - 2. * tf.reduce_sum(x * y, axis=-1, keepdims=True)
+    return tf.clip_by_value(dist, a_min, a_max)
+
+
 def batch_compute_kernel_matrix(
     x: Union[list, np.ndarray, tf.Tensor],
     y: Union[list, np.ndarray, tf.Tensor],
@@ -83,7 +109,50 @@ def batch_compute_kernel_matrix(
     return k_mat
 
 
-def mmd2_from_kernel_matrix(kernel_mat: tf.Tensor, m: int, permute: bool = False,
+def linear_mmd2(x: tf.Tensor, 
+                y: tf.Tensor, 
+                kernel: Callable,
+                permute: bool = False) -> float:
+    """
+    Compute maximum mean discrepancy (MMD^2) between 2 samples x and y with the 
+    linear-time estimator.
+    
+    Parameters
+    ----------
+    x
+        Batch of instances of shape [Nx, features].
+    y
+        Batch of instances of shape [Ny, features].
+    kernel
+        Kernel function.
+    permute
+        Whether to permute the row indices. Used for permutation tests.
+    """
+    n = np.shape(x)[0]
+    m = np.shape(y)[0]
+    if n != m:
+        raise RuntimeError("Linear-time estimator requires equal size samples")
+    if not permute:
+        k_xx = kernel(x=x[0::2, :], y=x[1::2, :], diag=True)
+        k_yy = kernel(x=y[0::2, :], y=y[1::2, :], diag=True)
+        k_xy = kernel(x=x[0::2, :], y=y[1::2, :], diag=True)
+        k_yz = kernel(x=y[0::2, :], y=x[1::2, :], diag=True)
+    else:
+        idx = np.random.permutation(m + n)
+        xy = tf.gather(tf.concat([x, y], axis=0), idx)
+        x_hat, y_hat = xy[:n, :], xy[n:, :]
+        k_xx = kernel(x_hat[0::2, :], x_hat[1::2, :], diag=True)
+        k_yy = kernel(y_hat[0::2, :], y_hat[1::2, :], diag=True)
+        k_xy = kernel(x_hat[0::2, :], y_hat[1::2, :], diag=True)
+        k_yz = kernel(y_hat[0::2, :], x_hat[1::2, :], diag=True)
+        
+    h = k_xx + k_yy - k_xy - k_yz
+    return tf.reduce_sum(h) / (n / 2)
+
+
+def mmd2_from_kernel_matrix(kernel_mat: tf.Tensor, 
+                            m: int, 
+                            permute: bool = False,
                             zero_diag: bool = True) -> tf.Tensor:
     """
     Compute maximum mean discrepancy (MMD^2) between 2 samples x and y from the
diff --git a/alibi_detect/utils/tensorflow/kernels.py b/alibi_detect/utils/tensorflow/kernels.py
index b6b692237..d424116a9 100644
--- a/alibi_detect/utils/tensorflow/kernels.py
+++ b/alibi_detect/utils/tensorflow/kernels.py
@@ -69,14 +69,23 @@ def __init__(
     def sigma(self) -> tf.Tensor:
         return tf.math.exp(self.log_sigma)
 
-    def call(self, x: tf.Tensor, y: tf.Tensor, infer_sigma: bool = False) -> tf.Tensor:
+    def call(self, x: tf.Tensor, y: tf.Tensor, 
+             infer_sigma: bool = False,
+             diag: bool = False) -> tf.Tensor:
         y = tf.cast(y, x.dtype)
         x, y = tf.reshape(x, (x.shape[0], -1)), tf.reshape(y, (y.shape[0], -1))  # flatten
-        dist = distance.squared_pairwise_distance(x, y)  # [Nx, Ny]
+        if not diag:
+            dist = distance.squared_pairwise_distance(x, y)  # [Nx, Ny]
+        else:
+            dist = distance.squared_distance(x, y)  # [Nx]
 
         if infer_sigma or self.init_required:
             if self.trainable and infer_sigma:
                 raise ValueError("Gradients cannot be computed w.r.t. an inferred sigma value")
+            if not diag:
+                dist_hat = dist
+            else:
+                dist_hat = distance.squared_pairwise_distance(x, y)
             sigma = self.init_sigma_fn(x, y, dist)
             self.log_sigma.assign(tf.math.log(sigma))
             self.init_required = False

From eaa2e4500df651cdcbf4298f1f7e9410b631c519 Mon Sep 17 00:00:00 2001
From: Hao Song <nuacesh@gmail.com>
Date: Fri, 1 Apr 2022 14:28:03 +0100
Subject: [PATCH 03/14] Minor fixes.

---
 alibi_detect/cd/pytorch/mmd.py            |  5 +++--
 alibi_detect/cd/tensorflow/mmd.py         |  6 +++---
 alibi_detect/utils/pytorch/distance.py    | 14 +++++++-------
 alibi_detect/utils/pytorch/kernels.py     |  2 +-
 alibi_detect/utils/tensorflow/distance.py | 16 ++++++++--------
 alibi_detect/utils/tensorflow/kernels.py  |  4 ++--
 6 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/alibi_detect/cd/pytorch/mmd.py b/alibi_detect/cd/pytorch/mmd.py
index f3c0a3a92..feb223cb1 100644
--- a/alibi_detect/cd/pytorch/mmd.py
+++ b/alibi_detect/cd/pytorch/mmd.py
@@ -135,12 +135,13 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
             mmd2_permuted = torch.tensor([linear_mmd2(x_ref, x, self.kernel, permute=True)
                                           for _ in range(self.n_permutations)])
             p_val = (mmd2 <= mmd2_permuted).float().mean()
-        elif self.estimator == 'quad': 
+        elif self.estimator == 'quad':
             kernel_mat = self.kernel_matrix(x_ref, x)  # type: ignore[arg-type]
             kernel_mat = kernel_mat - torch.diag(kernel_mat.diag())  # zero diagonal
             mmd2 = mmd2_from_kernel_matrix(kernel_mat, n, permute=False, zero_diag=False)
             mmd2_permuted = torch.Tensor(
-                [mmd2_from_kernel_matrix(kernel_mat, n, permute=True, zero_diag=False) for _ in range(self.n_permutations)]
+                [mmd2_from_kernel_matrix(kernel_mat, n, permute=True, zero_diag=False)
+                 for _ in range(self.n_permutations)]
             )
             if self.device.type == 'cuda':
                 mmd2, mmd2_permuted = mmd2.cpu(), mmd2_permuted.cpu()
diff --git a/alibi_detect/cd/tensorflow/mmd.py b/alibi_detect/cd/tensorflow/mmd.py
index 0f6ba38c8..ff35f8c6e 100644
--- a/alibi_detect/cd/tensorflow/mmd.py
+++ b/alibi_detect/cd/tensorflow/mmd.py
@@ -119,13 +119,13 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
             mmd2 = linear_mmd2(x_ref, x, self.kernel, permute=False).numpy()
             mmd2_permuted = np.array([linear_mmd2(x_ref, x, self.kernel, permute=True).numpy()
                                       for _ in range(self.n_permutations)])
-            p_val = (mmd2 <= mmd2_permuted).mean() 
+            p_val = (mmd2 <= mmd2_permuted).mean()
         elif self.estimator == 'quad':
             kernel_mat = self.kernel_matrix(x_ref, x)
             kernel_mat = kernel_mat - tf.linalg.diag(tf.linalg.diag_part(kernel_mat))  # zero diagonal
             mmd2 = mmd2_from_kernel_matrix(kernel_mat, n, permute=False, zero_diag=False).numpy()
             mmd2_permuted = np.array(
                 [mmd2_from_kernel_matrix(kernel_mat, n, permute=True, zero_diag=False).numpy()
-                for _ in range(self.n_permutations)])
-            p_val = (mmd2 <= mmd2_permuted).mean()        
+                 for _ in range(self.n_permutations)])
+            p_val = (mmd2 <= mmd2_permuted).mean()
         return p_val, mmd2, mmd2_permuted
diff --git a/alibi_detect/utils/pytorch/distance.py b/alibi_detect/utils/pytorch/distance.py
index 76400254b..6fc6f8fa6 100644
--- a/alibi_detect/utils/pytorch/distance.py
+++ b/alibi_detect/utils/pytorch/distance.py
@@ -115,14 +115,14 @@ def batch_compute_kernel_matrix(
     return k_mat
 
 
-def linear_mmd2(x: torch.Tensor, 
-                y: torch.Tensor, 
+def linear_mmd2(x: torch.Tensor,
+                y: torch.Tensor,
                 kernel: Callable,
                 permute: bool = False) -> float:
     """
-    Compute maximum mean discrepancy (MMD^2) between 2 samples x and y with the 
+    Compute maximum mean discrepancy (MMD^2) between 2 samples x and y with the
     linear-time estimator.
-    
+
     Parameters
     ----------
     x
@@ -151,13 +151,13 @@ def linear_mmd2(x: torch.Tensor,
         k_yy = kernel(y_hat[0::2, :], y_hat[1::2, :], diag=True)
         k_xy = kernel(x_hat[0::2, :], y_hat[1::2, :], diag=True)
         k_yz = kernel(y_hat[0::2, :], x_hat[1::2, :], diag=True)
-        
+
     h = k_xx + k_yy - k_xy - k_yz
     return torch.sum(h) / (n / 2)
 
 
-def mmd2_from_kernel_matrix(kernel_mat: torch.Tensor, 
-                            m: int, 
+def mmd2_from_kernel_matrix(kernel_mat: torch.Tensor,
+                            m: int,
                             permute: bool = False,
                             zero_diag: bool = True,
                             estimator: str = 'quad') -> torch.Tensor:
diff --git a/alibi_detect/utils/pytorch/kernels.py b/alibi_detect/utils/pytorch/kernels.py
index 4ccef96a5..2a4327e14 100644
--- a/alibi_detect/utils/pytorch/kernels.py
+++ b/alibi_detect/utils/pytorch/kernels.py
@@ -68,7 +68,7 @@ def __init__(
     def sigma(self) -> torch.Tensor:
         return self.log_sigma.exp()
 
-    def forward(self, x: Union[np.ndarray, torch.Tensor], 
+    def forward(self, x: Union[np.ndarray, torch.Tensor],
                 y: Union[np.ndarray, torch.Tensor],
                 infer_sigma: bool = False,
                 diag: bool = False) -> torch.Tensor:
diff --git a/alibi_detect/utils/tensorflow/distance.py b/alibi_detect/utils/tensorflow/distance.py
index e14a6def1..133cc3e65 100644
--- a/alibi_detect/utils/tensorflow/distance.py
+++ b/alibi_detect/utils/tensorflow/distance.py
@@ -31,7 +31,7 @@ def squared_pairwise_distance(x: tf.Tensor, y: tf.Tensor, a_min: float = 1e-30,
     return tf.clip_by_value(dist, a_min, a_max)
 
 
-def squared_distance(x: tf.Tensor, y: tf.Tensor, 
+def squared_distance(x: tf.Tensor, y: tf.Tensor,
                      a_min: float = 1e-30, a_max: float = 1e30) -> tf.Tensor:
     """
     TensorFlow squared Euclidean distance between samples x and y.
@@ -109,14 +109,14 @@ def batch_compute_kernel_matrix(
     return k_mat
 
 
-def linear_mmd2(x: tf.Tensor, 
-                y: tf.Tensor, 
+def linear_mmd2(x: tf.Tensor,
+                y: tf.Tensor,
                 kernel: Callable,
                 permute: bool = False) -> float:
     """
-    Compute maximum mean discrepancy (MMD^2) between 2 samples x and y with the 
+    Compute maximum mean discrepancy (MMD^2) between 2 samples x and y with the
     linear-time estimator.
-    
+
     Parameters
     ----------
     x
@@ -145,13 +145,13 @@ def linear_mmd2(x: tf.Tensor,
         k_yy = kernel(y_hat[0::2, :], y_hat[1::2, :], diag=True)
         k_xy = kernel(x_hat[0::2, :], y_hat[1::2, :], diag=True)
         k_yz = kernel(y_hat[0::2, :], x_hat[1::2, :], diag=True)
-        
+
     h = k_xx + k_yy - k_xy - k_yz
     return tf.reduce_sum(h) / (n / 2)
 
 
-def mmd2_from_kernel_matrix(kernel_mat: tf.Tensor, 
-                            m: int, 
+def mmd2_from_kernel_matrix(kernel_mat: tf.Tensor,
+                            m: int,
                             permute: bool = False,
                             zero_diag: bool = True) -> tf.Tensor:
     """
diff --git a/alibi_detect/utils/tensorflow/kernels.py b/alibi_detect/utils/tensorflow/kernels.py
index d424116a9..0b2cbc977 100644
--- a/alibi_detect/utils/tensorflow/kernels.py
+++ b/alibi_detect/utils/tensorflow/kernels.py
@@ -69,7 +69,7 @@ def __init__(
     def sigma(self) -> tf.Tensor:
         return tf.math.exp(self.log_sigma)
 
-    def call(self, x: tf.Tensor, y: tf.Tensor, 
+    def call(self, x: tf.Tensor, y: tf.Tensor,
              infer_sigma: bool = False,
              diag: bool = False) -> tf.Tensor:
         y = tf.cast(y, x.dtype)
@@ -86,7 +86,7 @@ def call(self, x: tf.Tensor, y: tf.Tensor,
                 dist_hat = dist
             else:
                 dist_hat = distance.squared_pairwise_distance(x, y)
-            sigma = self.init_sigma_fn(x, y, dist)
+            sigma = self.init_sigma_fn(x, y, dist_hat)
             self.log_sigma.assign(tf.math.log(sigma))
             self.init_required = False
 

From ea97f5220356b401245c75e711143f6359c53dd6 Mon Sep 17 00:00:00 2001
From: Hao Song <nuacesh@gmail.com>
Date: Fri, 1 Apr 2022 16:32:18 +0100
Subject: [PATCH 04/14] minor fixes.

---
 alibi_detect/cd/pytorch/mmd.py         | 23 +++++++++++------------
 alibi_detect/cd/tensorflow/mmd.py      |  4 ++--
 alibi_detect/utils/pytorch/distance.py |  7 +++++--
 3 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/alibi_detect/cd/pytorch/mmd.py b/alibi_detect/cd/pytorch/mmd.py
index feb223cb1..6fc394b45 100644
--- a/alibi_detect/cd/pytorch/mmd.py
+++ b/alibi_detect/cd/pytorch/mmd.py
@@ -122,28 +122,27 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
         and the MMD^2 values from the permutation test.
         """
         x_ref, x = self.preprocess(x)
-        x_ref = torch.from_numpy(x_ref).to(self.device)  # type: ignore[assignment]
-        x = torch.from_numpy(x).to(self.device)  # type: ignore[assignment]
-        # compute kernel matrix, MMD^2 and apply permutation test using the kernel matrix
         n = x.shape[0]
         m = x_ref.shape[0]
         if self.estimator == 'linear':
             n_hat = int(np.floor(min(n, m) / 2) * 2)
-            x_ref = x_ref[:n_hat, :]
-            x = x[:n_hat, :]
-            mmd2 = linear_mmd2(x_ref, x, self.kernel, permute=False)
-            mmd2_permuted = torch.tensor([linear_mmd2(x_ref, x, self.kernel, permute=True)
+            x_ref = torch.from_numpy(x_ref[:n_hat, :]).to(self.device) # type: ignore[assignment]
+            x = torch.from_numpy(x[:n_hat, :]).to(self.device) # type: ignore[assignment]
+            mmd2 = linear_mmd2(x_ref, x, self.kernel, permute=False)  # type: ignore[arg-type]
+            mmd2_permuted = torch.tensor([linear_mmd2(x_ref, x, self.kernel, permute=True)  # type: ignore[arg-type]
                                           for _ in range(self.n_permutations)])
-            p_val = (mmd2 <= mmd2_permuted).float().mean()
         elif self.estimator == 'quad':
+            x_ref = torch.from_numpy(x_ref).to(self.device)  # type: ignore[assignment]
+            x = torch.from_numpy(x).to(self.device)  # type: ignore[assignment]
+            # compute kernel matrix, MMD^2 and apply permutation test using the kernel matrix
             kernel_mat = self.kernel_matrix(x_ref, x)  # type: ignore[arg-type]
             kernel_mat = kernel_mat - torch.diag(kernel_mat.diag())  # zero diagonal
-            mmd2 = mmd2_from_kernel_matrix(kernel_mat, n, permute=False, zero_diag=False)
+            mmd2 = mmd2_from_kernel_matrix(kernel_mat, n, permute=False, zero_diag=False)  # type: ignore[assignment]
             mmd2_permuted = torch.Tensor(
                 [mmd2_from_kernel_matrix(kernel_mat, n, permute=True, zero_diag=False)
                  for _ in range(self.n_permutations)]
             )
-            if self.device.type == 'cuda':
-                mmd2, mmd2_permuted = mmd2.cpu(), mmd2_permuted.cpu()
-            p_val = (mmd2 <= mmd2_permuted).float().mean()
+        if self.device.type == 'cuda':
+            mmd2, mmd2_permuted = mmd2.cpu(), mmd2_permuted.cpu()
+        p_val = (mmd2 <= mmd2_permuted).float().mean()
         return p_val.numpy().item(), mmd2.numpy().item(), mmd2_permuted.numpy()
diff --git a/alibi_detect/cd/tensorflow/mmd.py b/alibi_detect/cd/tensorflow/mmd.py
index ff35f8c6e..fbe655040 100644
--- a/alibi_detect/cd/tensorflow/mmd.py
+++ b/alibi_detect/cd/tensorflow/mmd.py
@@ -116,8 +116,8 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
             n_hat = int(np.floor(min(n, m) / 2) * 2)
             x_ref = x_ref[:n_hat, :]
             x = x[:n_hat, :]
-            mmd2 = linear_mmd2(x_ref, x, self.kernel, permute=False).numpy()
-            mmd2_permuted = np.array([linear_mmd2(x_ref, x, self.kernel, permute=True).numpy()
+            mmd2 = linear_mmd2(x_ref, x, self.kernel, permute=False)
+            mmd2_permuted = np.array([linear_mmd2(x_ref, x, self.kernel, permute=True)
                                       for _ in range(self.n_permutations)])
             p_val = (mmd2 <= mmd2_permuted).mean()
         elif self.estimator == 'quad':
diff --git a/alibi_detect/utils/pytorch/distance.py b/alibi_detect/utils/pytorch/distance.py
index 6fc6f8fa6..3defbbbc8 100644
--- a/alibi_detect/utils/pytorch/distance.py
+++ b/alibi_detect/utils/pytorch/distance.py
@@ -118,7 +118,7 @@ def batch_compute_kernel_matrix(
 def linear_mmd2(x: torch.Tensor,
                 y: torch.Tensor,
                 kernel: Callable,
-                permute: bool = False) -> float:
+                permute: bool = False) -> torch.Tensor:
     """
     Compute maximum mean discrepancy (MMD^2) between 2 samples x and y with the
     linear-time estimator.
@@ -133,6 +133,9 @@ def linear_mmd2(x: torch.Tensor,
         Kernel function.
     permute
         Whether to permute the row indices. Used for permutation tests.
+    Returns
+    -------
+    MMD^2 between the samples.
     """
     n = np.shape(x)[0]
     m = np.shape(y)[0]
@@ -153,7 +156,7 @@ def linear_mmd2(x: torch.Tensor,
         k_yz = kernel(y_hat[0::2, :], x_hat[1::2, :], diag=True)
 
     h = k_xx + k_yy - k_xy - k_yz
-    return torch.sum(h) / (n / 2)
+    return h.sum() / (n / 2.)
 
 
 def mmd2_from_kernel_matrix(kernel_mat: torch.Tensor,

From f6b93d62dca773ca45464313996dd80602c72a27 Mon Sep 17 00:00:00 2001
From: Hao Song <nuacesh@gmail.com>
Date: Fri, 1 Apr 2022 16:32:18 +0100
Subject: [PATCH 05/14] minor fixes.

---
 alibi_detect/cd/pytorch/mmd.py         | 23 +++++++++++------------
 alibi_detect/cd/tensorflow/mmd.py      |  4 ++--
 alibi_detect/utils/pytorch/distance.py |  7 +++++--
 3 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/alibi_detect/cd/pytorch/mmd.py b/alibi_detect/cd/pytorch/mmd.py
index feb223cb1..13174416d 100644
--- a/alibi_detect/cd/pytorch/mmd.py
+++ b/alibi_detect/cd/pytorch/mmd.py
@@ -122,28 +122,27 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
         and the MMD^2 values from the permutation test.
         """
         x_ref, x = self.preprocess(x)
-        x_ref = torch.from_numpy(x_ref).to(self.device)  # type: ignore[assignment]
-        x = torch.from_numpy(x).to(self.device)  # type: ignore[assignment]
-        # compute kernel matrix, MMD^2 and apply permutation test using the kernel matrix
         n = x.shape[0]
         m = x_ref.shape[0]
         if self.estimator == 'linear':
             n_hat = int(np.floor(min(n, m) / 2) * 2)
-            x_ref = x_ref[:n_hat, :]
-            x = x[:n_hat, :]
-            mmd2 = linear_mmd2(x_ref, x, self.kernel, permute=False)
-            mmd2_permuted = torch.tensor([linear_mmd2(x_ref, x, self.kernel, permute=True)
+            x_ref = torch.from_numpy(x_ref[:n_hat, :]).to(self.device)  # type: ignore[assignment]
+            x = torch.from_numpy(x[:n_hat, :]).to(self.device)  # type: ignore[assignment]
+            mmd2 = linear_mmd2(x_ref, x, self.kernel, permute=False)  # type: ignore[arg-type]
+            mmd2_permuted = torch.tensor([linear_mmd2(x_ref, x, self.kernel, permute=True)  # type: ignore[arg-type]
                                           for _ in range(self.n_permutations)])
-            p_val = (mmd2 <= mmd2_permuted).float().mean()
         elif self.estimator == 'quad':
+            x_ref = torch.from_numpy(x_ref).to(self.device)  # type: ignore[assignment]
+            x = torch.from_numpy(x).to(self.device)  # type: ignore[assignment]
+            # compute kernel matrix, MMD^2 and apply permutation test using the kernel matrix
             kernel_mat = self.kernel_matrix(x_ref, x)  # type: ignore[arg-type]
             kernel_mat = kernel_mat - torch.diag(kernel_mat.diag())  # zero diagonal
-            mmd2 = mmd2_from_kernel_matrix(kernel_mat, n, permute=False, zero_diag=False)
+            mmd2 = mmd2_from_kernel_matrix(kernel_mat, n, permute=False, zero_diag=False)  # type: ignore[assignment]
             mmd2_permuted = torch.Tensor(
                 [mmd2_from_kernel_matrix(kernel_mat, n, permute=True, zero_diag=False)
                  for _ in range(self.n_permutations)]
             )
-            if self.device.type == 'cuda':
-                mmd2, mmd2_permuted = mmd2.cpu(), mmd2_permuted.cpu()
-            p_val = (mmd2 <= mmd2_permuted).float().mean()
+        if self.device.type == 'cuda':
+            mmd2, mmd2_permuted = mmd2.cpu(), mmd2_permuted.cpu()
+        p_val = (mmd2 <= mmd2_permuted).float().mean()
         return p_val.numpy().item(), mmd2.numpy().item(), mmd2_permuted.numpy()
diff --git a/alibi_detect/cd/tensorflow/mmd.py b/alibi_detect/cd/tensorflow/mmd.py
index ff35f8c6e..fbe655040 100644
--- a/alibi_detect/cd/tensorflow/mmd.py
+++ b/alibi_detect/cd/tensorflow/mmd.py
@@ -116,8 +116,8 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
             n_hat = int(np.floor(min(n, m) / 2) * 2)
             x_ref = x_ref[:n_hat, :]
             x = x[:n_hat, :]
-            mmd2 = linear_mmd2(x_ref, x, self.kernel, permute=False).numpy()
-            mmd2_permuted = np.array([linear_mmd2(x_ref, x, self.kernel, permute=True).numpy()
+            mmd2 = linear_mmd2(x_ref, x, self.kernel, permute=False)
+            mmd2_permuted = np.array([linear_mmd2(x_ref, x, self.kernel, permute=True)
                                       for _ in range(self.n_permutations)])
             p_val = (mmd2 <= mmd2_permuted).mean()
         elif self.estimator == 'quad':
diff --git a/alibi_detect/utils/pytorch/distance.py b/alibi_detect/utils/pytorch/distance.py
index 6fc6f8fa6..3defbbbc8 100644
--- a/alibi_detect/utils/pytorch/distance.py
+++ b/alibi_detect/utils/pytorch/distance.py
@@ -118,7 +118,7 @@ def batch_compute_kernel_matrix(
 def linear_mmd2(x: torch.Tensor,
                 y: torch.Tensor,
                 kernel: Callable,
-                permute: bool = False) -> float:
+                permute: bool = False) -> torch.Tensor:
     """
     Compute maximum mean discrepancy (MMD^2) between 2 samples x and y with the
     linear-time estimator.
@@ -133,6 +133,9 @@ def linear_mmd2(x: torch.Tensor,
         Kernel function.
     permute
         Whether to permute the row indices. Used for permutation tests.
+    Returns
+    -------
+    MMD^2 between the samples.
     """
     n = np.shape(x)[0]
     m = np.shape(y)[0]
@@ -153,7 +156,7 @@ def linear_mmd2(x: torch.Tensor,
         k_yz = kernel(y_hat[0::2, :], x_hat[1::2, :], diag=True)
 
     h = k_xx + k_yy - k_xy - k_yz
-    return torch.sum(h) / (n / 2)
+    return h.sum() / (n / 2.)
 
 
 def mmd2_from_kernel_matrix(kernel_mat: torch.Tensor,

From 7bacbec6d3f8228a6cb2af65028c980a4aec2653 Mon Sep 17 00:00:00 2001
From: Hao Song <nuacesh@gmail.com>
Date: Fri, 1 Apr 2022 17:48:18 +0100
Subject: [PATCH 06/14] mnior fixes

---
 alibi_detect/cd/pytorch/mmd.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/alibi_detect/cd/pytorch/mmd.py b/alibi_detect/cd/pytorch/mmd.py
index 26ad3d9be..13174416d 100644
--- a/alibi_detect/cd/pytorch/mmd.py
+++ b/alibi_detect/cd/pytorch/mmd.py
@@ -126,13 +126,8 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
         m = x_ref.shape[0]
         if self.estimator == 'linear':
             n_hat = int(np.floor(min(n, m) / 2) * 2)
-<<<<<<< .merge_file_a03956
             x_ref = torch.from_numpy(x_ref[:n_hat, :]).to(self.device)  # type: ignore[assignment]
             x = torch.from_numpy(x[:n_hat, :]).to(self.device)  # type: ignore[assignment]
-=======
-            x_ref = torch.from_numpy(x_ref[:n_hat, :]).to(self.device) # type: ignore[assignment]
-            x = torch.from_numpy(x[:n_hat, :]).to(self.device) # type: ignore[assignment]
->>>>>>> .merge_file_a26104
             mmd2 = linear_mmd2(x_ref, x, self.kernel, permute=False)  # type: ignore[arg-type]
             mmd2_permuted = torch.tensor([linear_mmd2(x_ref, x, self.kernel, permute=True)  # type: ignore[arg-type]
                                           for _ in range(self.n_permutations)])

From 7507276627bf0af5bd905663dbf63d8962086480 Mon Sep 17 00:00:00 2001
From: Hao Song <nuacesh@gmail.com>
Date: Mon, 4 Apr 2022 14:46:12 +0100
Subject: [PATCH 07/14] Created separated class for linear-time estimator

---
 alibi_detect/cd/mmd.py                    |  20 ++-
 alibi_detect/cd/pytorch/mmd.py            | 150 +++++++++++++++++++---
 alibi_detect/cd/tensorflow/mmd.py         | 135 ++++++++++++++++---
 alibi_detect/utils/tensorflow/distance.py |   4 +-
 4 files changed, 264 insertions(+), 45 deletions(-)

diff --git a/alibi_detect/cd/mmd.py b/alibi_detect/cd/mmd.py
index 0ffabe166..c6de81438 100644
--- a/alibi_detect/cd/mmd.py
+++ b/alibi_detect/cd/mmd.py
@@ -4,10 +4,10 @@
 from alibi_detect.utils.frameworks import has_pytorch, has_tensorflow
 
 if has_pytorch:
-    from alibi_detect.cd.pytorch.mmd import MMDDriftTorch
+    from alibi_detect.cd.pytorch.mmd import MMDDriftTorch, LinearTimeDriftTorch
 
 if has_tensorflow:
-    from alibi_detect.cd.tensorflow.mmd import MMDDriftTF
+    from alibi_detect.cd.tensorflow.mmd import MMDDriftTF, LinearTimeMMDDriftTF
 
 logger = logging.getLogger(__name__)
 
@@ -79,7 +79,7 @@ def __init__(
 
         kwargs = locals()
         args = [kwargs['x_ref']]
-        pop_kwargs = ['self', 'x_ref', 'backend', '__class__']
+        pop_kwargs = ['self', 'x_ref', 'backend', '__class__', 'estimator']
         [kwargs.pop(k, None) for k in pop_kwargs]
 
         if kernel is None:
@@ -91,9 +91,19 @@ def __init__(
 
         if backend == 'tensorflow' and has_tensorflow:
             kwargs.pop('device', None)
-            self._detector = MMDDriftTF(*args, **kwargs)  # type: ignore
+            if estimator == 'quad':
+                self._detector = MMDDriftTF(*args, **kwargs)  # type: ignore
+            elif estimator == 'linear':
+                self._detector = LinearTimeMMDDriftTF(*args, **kwargs)  # type: ignore
+            else:
+                raise NotImplementedError(f'{estimator} not implemented. Use quad or linear instead.')
         else:
-            self._detector = MMDDriftTorch(*args, **kwargs)  # type: ignore
+            if estimator == 'quad':
+                self._detector = MMDDriftTorch(*args, **kwargs)  # type: ignore
+            elif estimator == 'linear':
+                self._detector = LinearTimeDriftTorch(*args, **kwargs)  # type: ignore
+            else:
+                raise NotImplementedError(f'{estimator} not implemented. Use quad or linear instead.')
         self.meta = self._detector.meta
 
     def predict(self, x: Union[np.ndarray, list], return_p_val: bool = True, return_distance: bool = True) \
diff --git a/alibi_detect/cd/pytorch/mmd.py b/alibi_detect/cd/pytorch/mmd.py
index 13174416d..092ef5d63 100644
--- a/alibi_detect/cd/pytorch/mmd.py
+++ b/alibi_detect/cd/pytorch/mmd.py
@@ -123,26 +123,140 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
         """
         x_ref, x = self.preprocess(x)
         n = x.shape[0]
-        m = x_ref.shape[0]
-        if self.estimator == 'linear':
-            n_hat = int(np.floor(min(n, m) / 2) * 2)
-            x_ref = torch.from_numpy(x_ref[:n_hat, :]).to(self.device)  # type: ignore[assignment]
-            x = torch.from_numpy(x[:n_hat, :]).to(self.device)  # type: ignore[assignment]
-            mmd2 = linear_mmd2(x_ref, x, self.kernel, permute=False)  # type: ignore[arg-type]
-            mmd2_permuted = torch.tensor([linear_mmd2(x_ref, x, self.kernel, permute=True)  # type: ignore[arg-type]
-                                          for _ in range(self.n_permutations)])
-        elif self.estimator == 'quad':
-            x_ref = torch.from_numpy(x_ref).to(self.device)  # type: ignore[assignment]
-            x = torch.from_numpy(x).to(self.device)  # type: ignore[assignment]
-            # compute kernel matrix, MMD^2 and apply permutation test using the kernel matrix
-            kernel_mat = self.kernel_matrix(x_ref, x)  # type: ignore[arg-type]
-            kernel_mat = kernel_mat - torch.diag(kernel_mat.diag())  # zero diagonal
-            mmd2 = mmd2_from_kernel_matrix(kernel_mat, n, permute=False, zero_diag=False)  # type: ignore[assignment]
-            mmd2_permuted = torch.Tensor(
-                [mmd2_from_kernel_matrix(kernel_mat, n, permute=True, zero_diag=False)
-                 for _ in range(self.n_permutations)]
+        x_ref = torch.from_numpy(x_ref).to(self.device)  # type: ignore[assignment]
+        x = torch.from_numpy(x).to(self.device)  # type: ignore[assignment]
+        # compute kernel matrix, MMD^2 and apply permutation test using the kernel matrix
+        kernel_mat = self.kernel_matrix(x_ref, x)  # type: ignore[arg-type]
+        kernel_mat = kernel_mat - torch.diag(kernel_mat.diag())  # zero diagonal
+        mmd2 = mmd2_from_kernel_matrix(kernel_mat, n, permute=False, zero_diag=False)  # type: ignore[assignment]
+        mmd2_permuted = torch.Tensor(
+            [mmd2_from_kernel_matrix(kernel_mat, n, permute=True, zero_diag=False)
+             for _ in range(self.n_permutations)]
             )
         if self.device.type == 'cuda':
             mmd2, mmd2_permuted = mmd2.cpu(), mmd2_permuted.cpu()
         p_val = (mmd2 <= mmd2_permuted).float().mean()
         return p_val.numpy().item(), mmd2.numpy().item(), mmd2_permuted.numpy()
+
+
+class LinearTimeDriftTorch(BaseMMDDrift):
+    def __init__(
+            self,
+            x_ref: Union[np.ndarray, list],
+            p_val: float = .05,
+            preprocess_x_ref: bool = True,
+            update_x_ref: Optional[Dict[str, int]] = None,
+            preprocess_fn: Optional[Callable] = None,
+            kernel: Callable = GaussianRBF,
+            sigma: Optional[np.ndarray] = None,
+            configure_kernel_from_x_ref: bool = True,
+            n_permutations: int = 100,
+            device: Optional[str] = None,
+            input_shape: Optional[tuple] = None,
+            data_type: Optional[str] = None
+    ) -> None:
+        """
+        Maximum Mean Discrepancy (MMD) data drift detector using a permutation test, with linear-time estimator.
+
+        Parameters
+        ----------
+        x_ref
+            Data used as reference distribution.
+        p_val
+            p-value used for the significance of the permutation test.
+        preprocess_x_ref
+            Whether to already preprocess and store the reference data.
+        update_x_ref
+            Reference data can optionally be updated to the last n instances seen by the detector
+            or via reservoir sampling with size n. For the former, the parameter equals {'last': n} while
+            for reservoir sampling {'reservoir_sampling': n} is passed.
+        preprocess_fn
+            Function to preprocess the data before computing the data drift metrics.
+        kernel
+            Kernel used for the MMD computation, defaults to Gaussian RBF kernel.
+        sigma
+            Optionally set the GaussianRBF kernel bandwidth. Can also pass multiple bandwidth values as an array.
+            The kernel evaluation is then averaged over those bandwidths.
+        configure_kernel_from_x_ref
+            Whether to already configure the kernel bandwidth from the reference data.
+        n_permutations
+            Number of permutations used in the permutation test.
+        device
+            Device type used. The default None tries to use the GPU and falls back on CPU if needed.
+            Can be specified by passing either 'cuda', 'gpu' or 'cpu'.
+        input_shape
+            Shape of input data.
+        data_type
+            Optionally specify the data type (tabular, image or time-series). Added to metadata.
+        """
+        super().__init__(
+            x_ref=x_ref,
+            p_val=p_val,
+            preprocess_x_ref=preprocess_x_ref,
+            update_x_ref=update_x_ref,
+            preprocess_fn=preprocess_fn,
+            sigma=sigma,
+            configure_kernel_from_x_ref=configure_kernel_from_x_ref,
+            n_permutations=n_permutations,
+            input_shape=input_shape,
+            data_type=data_type
+        )
+        self.meta.update({'backend': 'pytorch'})
+
+        # set backend
+        if device is None or device.lower() in ['gpu', 'cuda']:
+            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+            if self.device.type == 'cpu':
+                print('No GPU detected, fall back on CPU.')
+        else:
+            self.device = torch.device('cpu')
+
+        # initialize kernel
+        sigma = torch.from_numpy(sigma).to(self.device) if isinstance(sigma,  # type: ignore[assignment]
+                                                                      np.ndarray) else None
+        self.kernel = kernel(sigma) if kernel == GaussianRBF else kernel
+
+        # compute kernel matrix for the reference data
+        if self.infer_sigma or isinstance(sigma, torch.Tensor):
+            x = torch.from_numpy(self.x_ref).to(self.device)
+            self.k_xx = self.kernel(x, x, infer_sigma=self.infer_sigma)
+            self.infer_sigma = False
+        else:
+            self.k_xx, self.infer_sigma = None, True
+
+    def kernel_matrix(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        """ Compute and return full kernel matrix between arrays x and y. """
+        k_xy = self.kernel(x, y, self.infer_sigma)
+        k_xx = self.k_xx if self.k_xx is not None and self.update_x_ref is None else self.kernel(x, x)
+        k_yy = self.kernel(y, y)
+        kernel_mat = torch.cat([torch.cat([k_xx, k_xy], 1), torch.cat([k_xy.T, k_yy], 1)], 0)
+        return kernel_mat
+
+    def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
+        """
+        Compute the p-value resulting from a permutation test using the maximum mean discrepancy
+        as a distance measure between the reference data and the data to be tested.
+
+        Parameters
+        ----------
+        x
+            Batch of instances.
+
+        Returns
+        -------
+        p-value obtained from the permutation test, the MMD^2 between the reference and test set
+        and the MMD^2 values from the permutation test.
+        """
+        x_ref, x = self.preprocess(x)
+        n = x.shape[0]
+        m = x_ref.shape[0]
+        n_hat = int(np.floor(min(n, m) / 2) * 2)
+        x_ref = torch.from_numpy(x_ref[:n_hat, :]).to(self.device)  # type: ignore[assignment]
+        x = torch.from_numpy(x[:n_hat, :]).to(self.device)  # type: ignore[assignment]
+        mmd2 = linear_mmd2(x_ref, x, self.kernel, permute=False)  # type: ignore[arg-type]
+        mmd2_permuted = torch.tensor([linear_mmd2(x_ref, x, self.kernel, permute=True)  # type: ignore[arg-type]
+                                      for _ in range(self.n_permutations)])
+        if self.device.type == 'cuda':
+            mmd2, mmd2_permuted = mmd2.cpu(), mmd2_permuted.cpu()
+        p_val = (mmd2 <= mmd2_permuted).float().mean()
+        return p_val.numpy().item(), mmd2.numpy().item(), mmd2_permuted.numpy()
diff --git a/alibi_detect/cd/tensorflow/mmd.py b/alibi_detect/cd/tensorflow/mmd.py
index fbe655040..e1138fee1 100644
--- a/alibi_detect/cd/tensorflow/mmd.py
+++ b/alibi_detect/cd/tensorflow/mmd.py
@@ -14,7 +14,6 @@ def __init__(
             self,
             x_ref: Union[np.ndarray, list],
             p_val: float = .05,
-            estimator: str = 'quad',
             preprocess_x_ref: bool = True,
             update_x_ref: Optional[Dict[str, int]] = None,
             preprocess_fn: Optional[Callable] = None,
@@ -34,8 +33,6 @@ def __init__(
             Data used as reference distribution.
         p_val
             p-value used for the significance of the permutation test.
-        estimator
-            Estimator used for the MMD^2 computation {'quad', 'linear'}.
         preprocess_x_ref
             Whether to already preprocess and store the reference data.
         update_x_ref
@@ -61,7 +58,114 @@ def __init__(
         super().__init__(
             x_ref=x_ref,
             p_val=p_val,
-            estimator=estimator,
+            preprocess_x_ref=preprocess_x_ref,
+            update_x_ref=update_x_ref,
+            preprocess_fn=preprocess_fn,
+            sigma=sigma,
+            configure_kernel_from_x_ref=configure_kernel_from_x_ref,
+            n_permutations=n_permutations,
+            input_shape=input_shape,
+            data_type=data_type
+        )
+        self.meta.update({'backend': 'tensorflow'})
+
+        # initialize kernel
+        if isinstance(sigma, np.ndarray):
+            sigma = tf.convert_to_tensor(sigma)
+        self.kernel = kernel(sigma) if kernel == GaussianRBF else kernel
+
+        # compute kernel matrix for the reference data
+        if self.infer_sigma or isinstance(sigma, tf.Tensor):
+            self.k_xx = self.kernel(self.x_ref, self.x_ref, infer_sigma=self.infer_sigma)
+            self.infer_sigma = False
+        else:
+            self.k_xx, self.infer_sigma = None, True
+
+    def kernel_matrix(self, x: Union[np.ndarray, tf.Tensor], y: Union[np.ndarray, tf.Tensor]) -> tf.Tensor:
+        """ Compute and return full kernel matrix between arrays x and y. """
+        k_xy = self.kernel(x, y, self.infer_sigma)
+        k_xx = self.k_xx if self.k_xx is not None and self.update_x_ref is None else self.kernel(x, x)
+        k_yy = self.kernel(y, y)
+        kernel_mat = tf.concat([tf.concat([k_xx, k_xy], 1), tf.concat([tf.transpose(k_xy, (1, 0)), k_yy], 1)], 0)
+        return kernel_mat
+
+    def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
+        """
+        Compute the p-value resulting from a permutation test using the maximum mean discrepancy
+        as a distance measure between the reference data and the data to be tested.
+
+        Parameters
+        ----------
+        x
+            Batch of instances.
+
+        Returns
+        -------
+        p-value obtained from the permutation test, the MMD^2 between the reference and test set
+        and the MMD^2 values from the permutation test.
+        """
+        x_ref, x = self.preprocess(x)
+        # compute kernel matrix, MMD^2 and apply permutation test using the kernel matrix
+        n = x.shape[0]
+        kernel_mat = self.kernel_matrix(x_ref, x)
+        kernel_mat = kernel_mat - tf.linalg.diag(tf.linalg.diag_part(kernel_mat))  # zero diagonal
+        mmd2 = mmd2_from_kernel_matrix(kernel_mat, n, permute=False, zero_diag=False).numpy()
+        mmd2_permuted = np.array(
+            [mmd2_from_kernel_matrix(kernel_mat, n, permute=True, zero_diag=False).numpy()
+             for _ in range(self.n_permutations)])
+        p_val = (mmd2 <= mmd2_permuted).mean()
+        return p_val, mmd2, mmd2_permuted
+
+
+class LinearTimeMMDDriftTF(BaseMMDDrift):
+    def __init__(
+            self,
+            x_ref: Union[np.ndarray, list],
+            p_val: float = .05,
+            preprocess_x_ref: bool = True,
+            update_x_ref: Optional[Dict[str, int]] = None,
+            preprocess_fn: Optional[Callable] = None,
+            kernel: Callable = GaussianRBF,
+            sigma: Optional[np.ndarray] = None,
+            configure_kernel_from_x_ref: bool = True,
+            n_permutations: int = 100,
+            input_shape: Optional[tuple] = None,
+            data_type: Optional[str] = None
+    ) -> None:
+        """
+        Maximum Mean Discrepancy (MMD) data drift detector using a permutation test, with linear-time estimator.
+
+        Parameters
+        ----------
+        x_ref
+            Data used as reference distribution.
+        p_val
+            p-value used for the significance of the permutation test.
+        preprocess_x_ref
+            Whether to already preprocess and store the reference data.
+        update_x_ref
+            Reference data can optionally be updated to the last n instances seen by the detector
+            or via reservoir sampling with size n. For the former, the parameter equals {'last': n} while
+            for reservoir sampling {'reservoir_sampling': n} is passed.
+        preprocess_fn
+            Function to preprocess the data before computing the data drift metrics.
+        kernel
+            Kernel used for the MMD computation, defaults to Gaussian RBF kernel.
+        sigma
+            Optionally set the GaussianRBF kernel bandwidth. Can also pass multiple bandwidth values as an array.
+            The kernel evaluation is then averaged over those bandwidths.
+        configure_kernel_from_x_ref
+            Whether to already configure the kernel bandwidth from the reference data.
+        n_permutations
+            Number of permutations used in the permutation test.
+        input_shape
+            Shape of input data.
+        data_type
+            Optionally specify the data type (tabular, image or time-series). Added to metadata.
+        """
+        super().__init__(
+            x_ref=x_ref,
+            p_val=p_val,
             preprocess_x_ref=preprocess_x_ref,
             update_x_ref=update_x_ref,
             preprocess_fn=preprocess_fn,
@@ -112,20 +216,11 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
         # compute kernel matrix, MMD^2 and apply permutation test using the kernel matrix
         n = x.shape[0]
         m = x_ref.shape[0]
-        if self.estimator == 'linear':
-            n_hat = int(np.floor(min(n, m) / 2) * 2)
-            x_ref = x_ref[:n_hat, :]
-            x = x[:n_hat, :]
-            mmd2 = linear_mmd2(x_ref, x, self.kernel, permute=False)
-            mmd2_permuted = np.array([linear_mmd2(x_ref, x, self.kernel, permute=True)
-                                      for _ in range(self.n_permutations)])
-            p_val = (mmd2 <= mmd2_permuted).mean()
-        elif self.estimator == 'quad':
-            kernel_mat = self.kernel_matrix(x_ref, x)
-            kernel_mat = kernel_mat - tf.linalg.diag(tf.linalg.diag_part(kernel_mat))  # zero diagonal
-            mmd2 = mmd2_from_kernel_matrix(kernel_mat, n, permute=False, zero_diag=False).numpy()
-            mmd2_permuted = np.array(
-                [mmd2_from_kernel_matrix(kernel_mat, n, permute=True, zero_diag=False).numpy()
-                 for _ in range(self.n_permutations)])
-            p_val = (mmd2 <= mmd2_permuted).mean()
+        n_hat = int(np.floor(min(n, m) / 2) * 2)
+        x_ref = x_ref[:n_hat, :]
+        x = x[:n_hat, :]
+        mmd2 = linear_mmd2(x_ref, x, self.kernel, permute=False).numpy()
+        mmd2_permuted = np.array([linear_mmd2(x_ref, x, self.kernel, permute=True).numpy()
+                                  for _ in range(self.n_permutations)])
+        p_val = (mmd2 <= mmd2_permuted).mean()
         return p_val, mmd2, mmd2_permuted
diff --git a/alibi_detect/utils/tensorflow/distance.py b/alibi_detect/utils/tensorflow/distance.py
index 133cc3e65..305c7074e 100644
--- a/alibi_detect/utils/tensorflow/distance.py
+++ b/alibi_detect/utils/tensorflow/distance.py
@@ -112,7 +112,7 @@ def batch_compute_kernel_matrix(
 def linear_mmd2(x: tf.Tensor,
                 y: tf.Tensor,
                 kernel: Callable,
-                permute: bool = False) -> float:
+                permute: bool = False) -> tf.Tensor:
     """
     Compute maximum mean discrepancy (MMD^2) between 2 samples x and y with the
     linear-time estimator.
@@ -147,7 +147,7 @@ def linear_mmd2(x: tf.Tensor,
         k_yz = kernel(y_hat[0::2, :], x_hat[1::2, :], diag=True)
 
     h = k_xx + k_yy - k_xy - k_yz
-    return tf.reduce_sum(h) / (n / 2)
+    return (tf.reduce_sum(h) / (n / 2.))
 
 
 def mmd2_from_kernel_matrix(kernel_mat: tf.Tensor,

From 29cd155194651fb26e7c57ab675e632602486edf Mon Sep 17 00:00:00 2001
From: Hao Song <nuacesh@gmail.com>
Date: Wed, 6 Apr 2022 14:04:20 +0100
Subject: [PATCH 08/14] Use Gaussian asymptotic distribution under null to
 calculate test threshold with the linear-time estimator, instead of
 permutation.

---
 alibi_detect/cd/base.py                   | 16 +++++++------
 alibi_detect/cd/mmd.py                    |  2 ++
 alibi_detect/cd/pytorch/mmd.py            | 19 +++++++--------
 alibi_detect/cd/tensorflow/mmd.py         | 15 ++++++------
 alibi_detect/utils/pytorch/distance.py    | 29 +++++++----------------
 alibi_detect/utils/tensorflow/distance.py |  6 +++--
 6 files changed, 41 insertions(+), 46 deletions(-)

diff --git a/alibi_detect/cd/base.py b/alibi_detect/cd/base.py
index 9f391a48d..a584ed925 100644
--- a/alibi_detect/cd/base.py
+++ b/alibi_detect/cd/base.py
@@ -452,7 +452,6 @@ def __init__(
             self,
             x_ref: Union[np.ndarray, list],
             p_val: float = .05,
-            estimator: str = 'quad',
             preprocess_x_ref: bool = True,
             update_x_ref: Optional[Dict[str, int]] = None,
             preprocess_fn: Optional[Callable] = None,
@@ -505,7 +504,6 @@ def __init__(
 
         # optionally already preprocess reference data
         self.p_val = p_val
-        self.estimator = estimator
         if preprocess_x_ref and isinstance(preprocess_fn, Callable):  # type: ignore[arg-type]
             self.x_ref = preprocess_fn(x_ref)
         else:
@@ -572,12 +570,16 @@ def predict(self, x: Union[np.ndarray, list], return_p_val: bool = True, return_
         'data' contains the drift prediction and optionally the p-value, threshold and MMD metric.
         """
         # compute drift scores
-        p_val, dist, dist_permutations = self.score(x)
-        drift_pred = int(p_val < self.p_val)
+        p_val, dist, tmp_v = self.score(x)
+        if len(np.shape(tmp_v)) > 0:
+            dist_permutations = tmp_v
+            # compute distance threshold
+            idx_threshold = int(self.p_val * len(dist_permutations))
+            distance_threshold = np.sort(dist_permutations)[::-1][idx_threshold]
+        else:
+            distance_threshold = tmp_v
 
-        # compute distance threshold
-        idx_threshold = int(self.p_val * len(dist_permutations))
-        distance_threshold = np.sort(dist_permutations)[::-1][idx_threshold]
+        drift_pred = int(p_val < self.p_val)
 
         # update reference dataset
         if isinstance(self.update_x_ref, dict) and self.preprocess_fn is not None and self.preprocess_x_ref:
diff --git a/alibi_detect/cd/mmd.py b/alibi_detect/cd/mmd.py
index c6de81438..5673a7648 100644
--- a/alibi_detect/cd/mmd.py
+++ b/alibi_detect/cd/mmd.py
@@ -94,6 +94,7 @@ def __init__(
             if estimator == 'quad':
                 self._detector = MMDDriftTF(*args, **kwargs)  # type: ignore
             elif estimator == 'linear':
+                kwargs.pop('n_permutations', None)
                 self._detector = LinearTimeMMDDriftTF(*args, **kwargs)  # type: ignore
             else:
                 raise NotImplementedError(f'{estimator} not implemented. Use quad or linear instead.')
@@ -101,6 +102,7 @@ def __init__(
             if estimator == 'quad':
                 self._detector = MMDDriftTorch(*args, **kwargs)  # type: ignore
             elif estimator == 'linear':
+                kwargs.pop('n_permutations', None)
                 self._detector = LinearTimeDriftTorch(*args, **kwargs)  # type: ignore
             else:
                 raise NotImplementedError(f'{estimator} not implemented. Use quad or linear instead.')
diff --git a/alibi_detect/cd/pytorch/mmd.py b/alibi_detect/cd/pytorch/mmd.py
index 092ef5d63..8b004c4f6 100644
--- a/alibi_detect/cd/pytorch/mmd.py
+++ b/alibi_detect/cd/pytorch/mmd.py
@@ -1,5 +1,6 @@
 import logging
 import numpy as np
+import scipy.stats as stats
 import torch
 from typing import Callable, Dict, Optional, Tuple, Union
 from alibi_detect.cd.base import BaseMMDDrift
@@ -14,7 +15,6 @@ def __init__(
             self,
             x_ref: Union[np.ndarray, list],
             p_val: float = .05,
-            estimator: Optional[str] = 'quad',
             preprocess_x_ref: bool = True,
             update_x_ref: Optional[Dict[str, int]] = None,
             preprocess_fn: Optional[Callable] = None,
@@ -35,8 +35,6 @@ def __init__(
             Data used as reference distribution.
         p_val
             p-value used for the significance of the permutation test.
-        estimator
-            Estimator used for the MMD^2 computation {'quad', 'linear'}.
         preprocess_x_ref
             Whether to already preprocess and store the reference data.
         update_x_ref
@@ -65,7 +63,6 @@ def __init__(
         super().__init__(
             x_ref=x_ref,
             p_val=p_val,
-            estimator=estimator,
             preprocess_x_ref=preprocess_x_ref,
             update_x_ref=update_x_ref,
             preprocess_fn=preprocess_fn,
@@ -253,10 +250,12 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
         n_hat = int(np.floor(min(n, m) / 2) * 2)
         x_ref = torch.from_numpy(x_ref[:n_hat, :]).to(self.device)  # type: ignore[assignment]
         x = torch.from_numpy(x[:n_hat, :]).to(self.device)  # type: ignore[assignment]
-        mmd2 = linear_mmd2(x_ref, x, self.kernel, permute=False)  # type: ignore[arg-type]
-        mmd2_permuted = torch.tensor([linear_mmd2(x_ref, x, self.kernel, permute=True)  # type: ignore[arg-type]
-                                      for _ in range(self.n_permutations)])
+        mmd2, var_mmd2 = linear_mmd2(x_ref, x, self.kernel)  # type: ignore[arg-type]
         if self.device.type == 'cuda':
-            mmd2, mmd2_permuted = mmd2.cpu(), mmd2_permuted.cpu()
-        p_val = (mmd2 <= mmd2_permuted).float().mean()
-        return p_val.numpy().item(), mmd2.numpy().item(), mmd2_permuted.numpy()
+            mmd2 = mmd2.cpu()
+        mmd2 = mmd2.numpy().item()
+        var_mmd2 = var_mmd2.numpy().item()
+        std_mmd2 = np.sqrt(var_mmd2)
+        p_val = 1 - stats.norm.cdf(mmd2 * np.sqrt(n_hat), loc=0., scale=std_mmd2*np.sqrt(2))
+        distance_threshold = stats.norm.ppf(1 - self.p_val, loc=0., scale=std_mmd2*np.sqrt(2))
+        return p_val, mmd2 * np.sqrt(n_hat), distance_threshold
diff --git a/alibi_detect/cd/tensorflow/mmd.py b/alibi_detect/cd/tensorflow/mmd.py
index e1138fee1..0475d7e10 100644
--- a/alibi_detect/cd/tensorflow/mmd.py
+++ b/alibi_detect/cd/tensorflow/mmd.py
@@ -1,5 +1,6 @@
 import logging
 import numpy as np
+import scipy.stats as stats
 import tensorflow as tf
 from typing import Callable, Dict, Optional, Tuple, Union
 from alibi_detect.cd.base import BaseMMDDrift
@@ -128,7 +129,6 @@ def __init__(
             kernel: Callable = GaussianRBF,
             sigma: Optional[np.ndarray] = None,
             configure_kernel_from_x_ref: bool = True,
-            n_permutations: int = 100,
             input_shape: Optional[tuple] = None,
             data_type: Optional[str] = None
     ) -> None:
@@ -171,7 +171,6 @@ def __init__(
             preprocess_fn=preprocess_fn,
             sigma=sigma,
             configure_kernel_from_x_ref=configure_kernel_from_x_ref,
-            n_permutations=n_permutations,
             input_shape=input_shape,
             data_type=data_type
         )
@@ -219,8 +218,10 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
         n_hat = int(np.floor(min(n, m) / 2) * 2)
         x_ref = x_ref[:n_hat, :]
         x = x[:n_hat, :]
-        mmd2 = linear_mmd2(x_ref, x, self.kernel, permute=False).numpy()
-        mmd2_permuted = np.array([linear_mmd2(x_ref, x, self.kernel, permute=True).numpy()
-                                  for _ in range(self.n_permutations)])
-        p_val = (mmd2 <= mmd2_permuted).mean()
-        return p_val, mmd2, mmd2_permuted
+        mmd2, var_mmd2 = linear_mmd2(x_ref, x, self.kernel, permute=False)
+        mmd2 = mmd2.numpy()
+        var_mmd2 = var_mmd2.numpy()
+        std_mmd2 = np.sqrt(var_mmd2)
+        p_val = 1 - stats.norm.cdf(mmd2 * np.sqrt(n_hat), loc=0., scale=std_mmd2*np.sqrt(2))
+        distance_threshold = stats.norm.ppf(1 - self.p_val, loc=0., scale=std_mmd2*np.sqrt(2))
+        return p_val, mmd2 * np.sqrt(n_hat), distance_threshold
diff --git a/alibi_detect/utils/pytorch/distance.py b/alibi_detect/utils/pytorch/distance.py
index 3defbbbc8..5db36b1e4 100644
--- a/alibi_detect/utils/pytorch/distance.py
+++ b/alibi_detect/utils/pytorch/distance.py
@@ -117,8 +117,7 @@ def batch_compute_kernel_matrix(
 
 def linear_mmd2(x: torch.Tensor,
                 y: torch.Tensor,
-                kernel: Callable,
-                permute: bool = False) -> torch.Tensor:
+                kernel: Callable) -> Tuple[torch.Tensor, torch.Tensor]:
     """
     Compute maximum mean discrepancy (MMD^2) between 2 samples x and y with the
     linear-time estimator.
@@ -131,8 +130,6 @@ def linear_mmd2(x: torch.Tensor,
         Batch of instances of shape [Ny, features].
     kernel
         Kernel function.
-    permute
-        Whether to permute the row indices. Used for permutation tests.
     Returns
     -------
     MMD^2 between the samples.
@@ -141,29 +138,21 @@ def linear_mmd2(x: torch.Tensor,
     m = np.shape(y)[0]
     if n != m:
         raise RuntimeError("Linear-time estimator requires equal size samples")
-    if not permute:
-        k_xx = kernel(x=x[0::2, :], y=x[1::2, :], diag=True)
-        k_yy = kernel(x=y[0::2, :], y=y[1::2, :], diag=True)
-        k_xy = kernel(x=x[0::2, :], y=y[1::2, :], diag=True)
-        k_yz = kernel(x=y[0::2, :], y=x[1::2, :], diag=True)
-    else:
-        idx = np.random.permutation(m + n)
-        xy = torch.cat([x, y], dim=0)[idx, :]
-        x_hat, y_hat = xy[:n, :], xy[n:, :]
-        k_xx = kernel(x_hat[0::2, :], x_hat[1::2, :], diag=True)
-        k_yy = kernel(y_hat[0::2, :], y_hat[1::2, :], diag=True)
-        k_xy = kernel(x_hat[0::2, :], y_hat[1::2, :], diag=True)
-        k_yz = kernel(y_hat[0::2, :], x_hat[1::2, :], diag=True)
+    k_xx = kernel(x=x[0::2, :], y=x[1::2, :], diag=True)
+    k_yy = kernel(x=y[0::2, :], y=y[1::2, :], diag=True)
+    k_xy = kernel(x=x[0::2, :], y=y[1::2, :], diag=True)
+    k_yz = kernel(x=y[0::2, :], y=x[1::2, :], diag=True)
 
     h = k_xx + k_yy - k_xy - k_yz
-    return h.sum() / (n / 2.)
+    mmd2 = h.sum() / (n / 2.)
+    var_mmd2 = ((h * h).sum() / (n / 2.)) - (mmd2 ** 2)
+    return mmd2, var_mmd2
 
 
 def mmd2_from_kernel_matrix(kernel_mat: torch.Tensor,
                             m: int,
                             permute: bool = False,
-                            zero_diag: bool = True,
-                            estimator: str = 'quad') -> torch.Tensor:
+                            zero_diag: bool = True) -> torch.Tensor:
     """
     Compute maximum mean discrepancy (MMD^2) between 2 samples x and y from the
     full kernel matrix between the samples.
diff --git a/alibi_detect/utils/tensorflow/distance.py b/alibi_detect/utils/tensorflow/distance.py
index 305c7074e..306ef1bbe 100644
--- a/alibi_detect/utils/tensorflow/distance.py
+++ b/alibi_detect/utils/tensorflow/distance.py
@@ -112,7 +112,7 @@ def batch_compute_kernel_matrix(
 def linear_mmd2(x: tf.Tensor,
                 y: tf.Tensor,
                 kernel: Callable,
-                permute: bool = False) -> tf.Tensor:
+                permute: bool = False) -> Tuple[tf.Tensor, tf.Tensor]:
     """
     Compute maximum mean discrepancy (MMD^2) between 2 samples x and y with the
     linear-time estimator.
@@ -147,7 +147,9 @@ def linear_mmd2(x: tf.Tensor,
         k_yz = kernel(y_hat[0::2, :], x_hat[1::2, :], diag=True)
 
     h = k_xx + k_yy - k_xy - k_yz
-    return (tf.reduce_sum(h) / (n / 2.))
+    mmd2 = (tf.reduce_sum(h) / (n / 2.))
+    var_mmd2 = (tf.reduce_sum(h ** 2) / (n / 2.)) - (mmd2 ** 2)
+    return mmd2, var_mmd2
 
 
 def mmd2_from_kernel_matrix(kernel_mat: tf.Tensor,

From eef8defa2bf391eb44da9aa38c3aa0a4ecc4f48d Mon Sep 17 00:00:00 2001
From: Hao Song <nuacesh@gmail.com>
Date: Tue, 19 Apr 2022 12:51:31 +0100
Subject: [PATCH 09/14] Fixes according to PR review.

---
 alibi_detect/cd/mmd.py                    |  5 ++-
 alibi_detect/cd/pytorch/mmd.py            |  7 ++--
 alibi_detect/cd/tensorflow/mmd.py         |  9 ++---
 alibi_detect/utils/pytorch/distance.py    | 30 ++++++++++-------
 alibi_detect/utils/pytorch/kernels.py     | 11 +++---
 alibi_detect/utils/tensorflow/distance.py | 41 ++++++++++-------------
 alibi_detect/utils/tensorflow/kernels.py  | 11 +++---
 7 files changed, 57 insertions(+), 57 deletions(-)

diff --git a/alibi_detect/cd/mmd.py b/alibi_detect/cd/mmd.py
index 5673a7648..e376c1597 100644
--- a/alibi_detect/cd/mmd.py
+++ b/alibi_detect/cd/mmd.py
@@ -42,7 +42,10 @@ def __init__(
         p_val
             p-value used for the significance of the permutation test.
         estimator
-            Estimator used for the MMD^2 computation {'quad', 'linear'}.
+            Estimator used for the MMD^2 computation {'quad', 'linear'}. 'Quad' is the default and
+            uses the quadratic u-statistics on each square kernel matrix. 'Linear' uses the linear
+            time estimator as in Gretton et al. (2014), and the threshold is computed using the Gaussian
+            asympotic distribution under null.
         preprocess_x_ref
             Whether to already preprocess and store the reference data.
         update_x_ref
diff --git a/alibi_detect/cd/pytorch/mmd.py b/alibi_detect/cd/pytorch/mmd.py
index 8b004c4f6..024ba41c9 100644
--- a/alibi_detect/cd/pytorch/mmd.py
+++ b/alibi_detect/cd/pytorch/mmd.py
@@ -256,6 +256,7 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
         mmd2 = mmd2.numpy().item()
         var_mmd2 = var_mmd2.numpy().item()
         std_mmd2 = np.sqrt(var_mmd2)
-        p_val = 1 - stats.norm.cdf(mmd2 * np.sqrt(n_hat), loc=0., scale=std_mmd2*np.sqrt(2))
-        distance_threshold = stats.norm.ppf(1 - self.p_val, loc=0., scale=std_mmd2*np.sqrt(2))
-        return p_val, mmd2 * np.sqrt(n_hat), distance_threshold
+        t = mmd2 / (std_mmd2 / np.sqrt(n_hat / 2.))
+        p_val = 1 - stats.t.cdf(t, df=(n_hat / 2.) - 1)
+        distance_threshold = stats.t.ppf(1 - self.p_val, df=(n_hat / 2.) - 1)
+        return p_val, t, distance_threshold
diff --git a/alibi_detect/cd/tensorflow/mmd.py b/alibi_detect/cd/tensorflow/mmd.py
index 0475d7e10..cfdaba024 100644
--- a/alibi_detect/cd/tensorflow/mmd.py
+++ b/alibi_detect/cd/tensorflow/mmd.py
@@ -218,10 +218,11 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
         n_hat = int(np.floor(min(n, m) / 2) * 2)
         x_ref = x_ref[:n_hat, :]
         x = x[:n_hat, :]
-        mmd2, var_mmd2 = linear_mmd2(x_ref, x, self.kernel, permute=False)
+        mmd2, var_mmd2 = linear_mmd2(x_ref, x, self.kernel)
         mmd2 = mmd2.numpy()
         var_mmd2 = var_mmd2.numpy()
         std_mmd2 = np.sqrt(var_mmd2)
-        p_val = 1 - stats.norm.cdf(mmd2 * np.sqrt(n_hat), loc=0., scale=std_mmd2*np.sqrt(2))
-        distance_threshold = stats.norm.ppf(1 - self.p_val, loc=0., scale=std_mmd2*np.sqrt(2))
-        return p_val, mmd2 * np.sqrt(n_hat), distance_threshold
+        t = mmd2 / (std_mmd2 / np.sqrt(n_hat / 2.))
+        p_val = 1 - stats.t.cdf(t, df=(n_hat / 2.) - 1)
+        distance_threshold = stats.t.ppf(1 - self.p_val, df=(n_hat / 2.) - 1)
+        return p_val, t, distance_threshold
diff --git a/alibi_detect/utils/pytorch/distance.py b/alibi_detect/utils/pytorch/distance.py
index 5db36b1e4..e26f6f380 100644
--- a/alibi_detect/utils/pytorch/distance.py
+++ b/alibi_detect/utils/pytorch/distance.py
@@ -115,9 +115,11 @@ def batch_compute_kernel_matrix(
     return k_mat
 
 
-def linear_mmd2(x: torch.Tensor,
-                y: torch.Tensor,
-                kernel: Callable) -> Tuple[torch.Tensor, torch.Tensor]:
+def linear_mmd2(
+    x: torch.Tensor,
+    y: torch.Tensor,
+    kernel: Callable
+) -> Tuple[torch.Tensor, torch.Tensor]:
     """
     Compute maximum mean discrepancy (MMD^2) between 2 samples x and y with the
     linear-time estimator.
@@ -138,21 +140,23 @@ def linear_mmd2(x: torch.Tensor,
     m = np.shape(y)[0]
     if n != m:
         raise RuntimeError("Linear-time estimator requires equal size samples")
-    k_xx = kernel(x=x[0::2, :], y=x[1::2, :], diag=True)
-    k_yy = kernel(x=y[0::2, :], y=y[1::2, :], diag=True)
-    k_xy = kernel(x=x[0::2, :], y=y[1::2, :], diag=True)
-    k_yz = kernel(x=y[0::2, :], y=x[1::2, :], diag=True)
+    k_xx = kernel(x=x[0::2, :], y=x[1::2, :], pairwise=False)
+    k_yy = kernel(x=y[0::2, :], y=y[1::2, :], pairwise=False)
+    k_xy = kernel(x=x[0::2, :], y=y[1::2, :], pairwise=False)
+    k_yz = kernel(x=y[0::2, :], y=x[1::2, :], pairwise=False)
 
     h = k_xx + k_yy - k_xy - k_yz
-    mmd2 = h.sum() / (n / 2.)
-    var_mmd2 = ((h * h).sum() / (n / 2.)) - (mmd2 ** 2)
+    mmd2 = h.mean()
+    var_mmd2 = torch.var(h, unbiased=True)
     return mmd2, var_mmd2
 
 
-def mmd2_from_kernel_matrix(kernel_mat: torch.Tensor,
-                            m: int,
-                            permute: bool = False,
-                            zero_diag: bool = True) -> torch.Tensor:
+def mmd2_from_kernel_matrix(
+    kernel_mat: torch.Tensor,
+    m: int,
+    permute: bool = False,
+    zero_diag: bool = True
+) -> torch.Tensor:
     """
     Compute maximum mean discrepancy (MMD^2) between 2 samples x and y from the
     full kernel matrix between the samples.
diff --git a/alibi_detect/utils/pytorch/kernels.py b/alibi_detect/utils/pytorch/kernels.py
index 2a4327e14..0e4390976 100644
--- a/alibi_detect/utils/pytorch/kernels.py
+++ b/alibi_detect/utils/pytorch/kernels.py
@@ -71,10 +71,10 @@ def sigma(self) -> torch.Tensor:
     def forward(self, x: Union[np.ndarray, torch.Tensor],
                 y: Union[np.ndarray, torch.Tensor],
                 infer_sigma: bool = False,
-                diag: bool = False) -> torch.Tensor:
+                pairwise: bool = True) -> torch.Tensor:
 
         x, y = torch.as_tensor(x), torch.as_tensor(y)
-        if not diag:
+        if pairwise:
             dist = distance.squared_pairwise_distance(x.flatten(1), y.flatten(1))  # [Nx, Ny]
         else:
             dist = distance.squared_distance(x.flatten(1), y.flatten(1))  # [N, 1]
@@ -82,11 +82,10 @@ def forward(self, x: Union[np.ndarray, torch.Tensor],
         if infer_sigma or self.init_required:
             if self.trainable and infer_sigma:
                 raise ValueError("Gradients cannot be computed w.r.t. an inferred sigma value")
-            if not diag:
-                dist_hat = dist
+            if pairwise:
+                sigma = self.init_sigma_fn(x, y, dist)
             else:
-                dist_hat = distance.squared_pairwise_distance(x.flatten(1), y.flatten(1))
-            sigma = self.init_sigma_fn(x, y, dist_hat)
+                sigma = (.5 * dist.flatten().sort().values[dist.shape[0] // 2 - 1].unsqueeze(dim=-1)) ** .5
             with torch.no_grad():
                 self.log_sigma.copy_(sigma.log().clone())
             self.init_required = False
diff --git a/alibi_detect/utils/tensorflow/distance.py b/alibi_detect/utils/tensorflow/distance.py
index 306ef1bbe..82cac2c50 100644
--- a/alibi_detect/utils/tensorflow/distance.py
+++ b/alibi_detect/utils/tensorflow/distance.py
@@ -109,10 +109,11 @@ def batch_compute_kernel_matrix(
     return k_mat
 
 
-def linear_mmd2(x: tf.Tensor,
-                y: tf.Tensor,
-                kernel: Callable,
-                permute: bool = False) -> Tuple[tf.Tensor, tf.Tensor]:
+def linear_mmd2(
+    x: tf.Tensor,
+    y: tf.Tensor,
+    kernel: Callable
+) -> Tuple[tf.Tensor, tf.Tensor]:
     """
     Compute maximum mean discrepancy (MMD^2) between 2 samples x and y with the
     linear-time estimator.
@@ -132,30 +133,22 @@ def linear_mmd2(x: tf.Tensor,
     m = np.shape(y)[0]
     if n != m:
         raise RuntimeError("Linear-time estimator requires equal size samples")
-    if not permute:
-        k_xx = kernel(x=x[0::2, :], y=x[1::2, :], diag=True)
-        k_yy = kernel(x=y[0::2, :], y=y[1::2, :], diag=True)
-        k_xy = kernel(x=x[0::2, :], y=y[1::2, :], diag=True)
-        k_yz = kernel(x=y[0::2, :], y=x[1::2, :], diag=True)
-    else:
-        idx = np.random.permutation(m + n)
-        xy = tf.gather(tf.concat([x, y], axis=0), idx)
-        x_hat, y_hat = xy[:n, :], xy[n:, :]
-        k_xx = kernel(x_hat[0::2, :], x_hat[1::2, :], diag=True)
-        k_yy = kernel(y_hat[0::2, :], y_hat[1::2, :], diag=True)
-        k_xy = kernel(x_hat[0::2, :], y_hat[1::2, :], diag=True)
-        k_yz = kernel(y_hat[0::2, :], x_hat[1::2, :], diag=True)
-
+    k_xx = kernel(x=x[0::2, :], y=x[1::2, :], pairwise=False)
+    k_yy = kernel(x=y[0::2, :], y=y[1::2, :], pairwise=False)
+    k_xy = kernel(x=x[0::2, :], y=y[1::2, :], pairwise=False)
+    k_yz = kernel(x=y[0::2, :], y=x[1::2, :], pairwise=False)
     h = k_xx + k_yy - k_xy - k_yz
-    mmd2 = (tf.reduce_sum(h) / (n / 2.))
-    var_mmd2 = (tf.reduce_sum(h ** 2) / (n / 2.)) - (mmd2 ** 2)
+    mmd2 = tf.reduce_mean(h)
+    var_mmd2 = tf.math.reduce_sum(h ** 2) / ((n / 2.) - 1) - (mmd2 ** 2)
     return mmd2, var_mmd2
 
 
-def mmd2_from_kernel_matrix(kernel_mat: tf.Tensor,
-                            m: int,
-                            permute: bool = False,
-                            zero_diag: bool = True) -> tf.Tensor:
+def mmd2_from_kernel_matrix(
+    kernel_mat: tf.Tensor,
+    m: int,
+    permute: bool = False,
+    zero_diag: bool = True
+) -> tf.Tensor:
     """
     Compute maximum mean discrepancy (MMD^2) between 2 samples x and y from the
     full kernel matrix between the samples.
diff --git a/alibi_detect/utils/tensorflow/kernels.py b/alibi_detect/utils/tensorflow/kernels.py
index 0b2cbc977..787e6a502 100644
--- a/alibi_detect/utils/tensorflow/kernels.py
+++ b/alibi_detect/utils/tensorflow/kernels.py
@@ -71,10 +71,10 @@ def sigma(self) -> tf.Tensor:
 
     def call(self, x: tf.Tensor, y: tf.Tensor,
              infer_sigma: bool = False,
-             diag: bool = False) -> tf.Tensor:
+             pairwise: bool = True) -> tf.Tensor:
         y = tf.cast(y, x.dtype)
         x, y = tf.reshape(x, (x.shape[0], -1)), tf.reshape(y, (y.shape[0], -1))  # flatten
-        if not diag:
+        if pairwise:
             dist = distance.squared_pairwise_distance(x, y)  # [Nx, Ny]
         else:
             dist = distance.squared_distance(x, y)  # [Nx]
@@ -82,11 +82,10 @@ def call(self, x: tf.Tensor, y: tf.Tensor,
         if infer_sigma or self.init_required:
             if self.trainable and infer_sigma:
                 raise ValueError("Gradients cannot be computed w.r.t. an inferred sigma value")
-            if not diag:
-                dist_hat = dist
+            if pairwise:
+                sigma = self.init_sigma_fn(x, y, dist)
             else:
-                dist_hat = distance.squared_pairwise_distance(x, y)
-            sigma = self.init_sigma_fn(x, y, dist_hat)
+                sigma = tf.expand_dims((.5 * tf.sort(tf.reshape(dist, (-1,)))[dist.shape[0] // 2 - 1]) ** .5, axis=0)
             self.log_sigma.assign(tf.math.log(sigma))
             self.init_required = False
 

From 678eae0a9c00c117c3be83cb0ce1ccc0422767a0 Mon Sep 17 00:00:00 2001
From: Hao Song <nuacesh@gmail.com>
Date: Mon, 25 Apr 2022 20:59:16 +0100
Subject: [PATCH 10/14] Doc string fixes following review comments.

---
 alibi_detect/cd/mmd.py            | 3 ++-
 alibi_detect/cd/pytorch/mmd.py    | 4 ++--
 alibi_detect/cd/tensorflow/mmd.py | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/alibi_detect/cd/mmd.py b/alibi_detect/cd/mmd.py
index e376c1597..73db0b549 100644
--- a/alibi_detect/cd/mmd.py
+++ b/alibi_detect/cd/mmd.py
@@ -146,6 +146,7 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
         Returns
         -------
         p-value obtained from the permutation test, the MMD^2 between the reference and test set
-        and the MMD^2 values from the permutation test.
+        and the MMD^2 values from the qudratic permutation test, or the threshold for the given
+        significance level for the linear time test.
         """
         return self._detector.score(x)
diff --git a/alibi_detect/cd/pytorch/mmd.py b/alibi_detect/cd/pytorch/mmd.py
index 024ba41c9..738940a09 100644
--- a/alibi_detect/cd/pytorch/mmd.py
+++ b/alibi_detect/cd/pytorch/mmd.py
@@ -241,8 +241,8 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
 
         Returns
         -------
-        p-value obtained from the permutation test, the MMD^2 between the reference and test set
-        and the MMD^2 values from the permutation test.
+        p-value obtained from the null hypothesis, the MMD^2 between the reference and test set
+        and the MMD^2 threshold for the given significance level.
         """
         x_ref, x = self.preprocess(x)
         n = x.shape[0]
diff --git a/alibi_detect/cd/tensorflow/mmd.py b/alibi_detect/cd/tensorflow/mmd.py
index cfdaba024..eb136b583 100644
--- a/alibi_detect/cd/tensorflow/mmd.py
+++ b/alibi_detect/cd/tensorflow/mmd.py
@@ -208,8 +208,8 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
 
         Returns
         -------
-        p-value obtained from the permutation test, the MMD^2 between the reference and test set
-        and the MMD^2 values from the permutation test.
+        p-value obtained from the null hypothesis, the MMD^2 between the reference and test set
+        and the MMD^2 threshold for the given significance level.
         """
         x_ref, x = self.preprocess(x)
         # compute kernel matrix, MMD^2 and apply permutation test using the kernel matrix

From 7952ab34e9f9f2692bdf0125ed7ae8f926755e25 Mon Sep 17 00:00:00 2001
From: Hao Song <nuacesh@gmail.com>
Date: Sun, 8 May 2022 23:46:43 +0100
Subject: [PATCH 11/14] Added tests and doc for the new estimator, fixes
 following review comments.

---
 alibi_detect/cd/mmd.py                        |  11 +-
 alibi_detect/cd/pytorch/mmd.py                |  32 +++---
 .../pytorch/tests/test_linear_time_mmd_pt.py  |  96 ++++++++++++++++
 alibi_detect/cd/tensorflow/mmd.py             |  27 +++--
 .../tests/test_linear_time_mmd_tf.py          | 107 ++++++++++++++++++
 alibi_detect/utils/pytorch/distance.py        |  25 ++--
 alibi_detect/utils/pytorch/kernels.py         |  46 ++++----
 alibi_detect/utils/tensorflow/distance.py     |  29 +++--
 alibi_detect/utils/tensorflow/kernels.py      |  40 ++++---
 doc/source/cd/methods/mmddrift.ipynb          |   2 +
 10 files changed, 330 insertions(+), 85 deletions(-)
 create mode 100644 alibi_detect/cd/pytorch/tests/test_linear_time_mmd_pt.py
 create mode 100644 alibi_detect/cd/tensorflow/tests/test_linear_time_mmd_tf.py

diff --git a/alibi_detect/cd/mmd.py b/alibi_detect/cd/mmd.py
index 73db0b549..3b8751b43 100644
--- a/alibi_detect/cd/mmd.py
+++ b/alibi_detect/cd/mmd.py
@@ -4,7 +4,7 @@
 from alibi_detect.utils.frameworks import has_pytorch, has_tensorflow
 
 if has_pytorch:
-    from alibi_detect.cd.pytorch.mmd import MMDDriftTorch, LinearTimeDriftTorch
+    from alibi_detect.cd.pytorch.mmd import MMDDriftTorch, LinearTimeMMDDriftTorch
 
 if has_tensorflow:
     from alibi_detect.cd.tensorflow.mmd import MMDDriftTF, LinearTimeMMDDriftTF
@@ -44,8 +44,8 @@ def __init__(
         estimator
             Estimator used for the MMD^2 computation {'quad', 'linear'}. 'Quad' is the default and
             uses the quadratic u-statistics on each square kernel matrix. 'Linear' uses the linear
-            time estimator as in Gretton et al. (2014), and the threshold is computed using the Gaussian
-            asympotic distribution under null.
+            time estimator as in Gretton et al. (JMLR 2014, sec 6), and the threshold is computed
+            using the Gaussian asympotic distribution under null.
         preprocess_x_ref
             Whether to already preprocess and store the reference data.
         update_x_ref
@@ -62,7 +62,8 @@ def __init__(
         configure_kernel_from_x_ref
             Whether to already configure the kernel bandwidth from the reference data.
         n_permutations
-            Number of permutations used in the permutation test.
+            Number of permutations used in the permutation test, only used for the quadratic estimator
+            (estimator='quad').
         device
             Device type used. The default None tries to use the GPU and falls back on CPU if needed.
             Can be specified by passing either 'cuda', 'gpu' or 'cpu'. Only relevant for 'pytorch' backend.
@@ -106,7 +107,7 @@ def __init__(
                 self._detector = MMDDriftTorch(*args, **kwargs)  # type: ignore
             elif estimator == 'linear':
                 kwargs.pop('n_permutations', None)
-                self._detector = LinearTimeDriftTorch(*args, **kwargs)  # type: ignore
+                self._detector = LinearTimeMMDDriftTorch(*args, **kwargs)  # type: ignore
             else:
                 raise NotImplementedError(f'{estimator} not implemented. Use quad or linear instead.')
         self.meta = self._detector.meta
diff --git a/alibi_detect/cd/pytorch/mmd.py b/alibi_detect/cd/pytorch/mmd.py
index 738940a09..635f4342a 100644
--- a/alibi_detect/cd/pytorch/mmd.py
+++ b/alibi_detect/cd/pytorch/mmd.py
@@ -136,7 +136,7 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
         return p_val.numpy().item(), mmd2.numpy().item(), mmd2_permuted.numpy()
 
 
-class LinearTimeDriftTorch(BaseMMDDrift):
+class LinearTimeMMDDriftTorch(BaseMMDDrift):
     def __init__(
             self,
             x_ref: Union[np.ndarray, list],
@@ -147,13 +147,12 @@ def __init__(
             kernel: Callable = GaussianRBF,
             sigma: Optional[np.ndarray] = None,
             configure_kernel_from_x_ref: bool = True,
-            n_permutations: int = 100,
             device: Optional[str] = None,
             input_shape: Optional[tuple] = None,
             data_type: Optional[str] = None
     ) -> None:
         """
-        Maximum Mean Discrepancy (MMD) data drift detector using a permutation test, with linear-time estimator.
+        Maximum Mean Discrepancy (MMD) data drift detector using a linear-time estimator.
 
         Parameters
         ----------
@@ -176,8 +175,6 @@ def __init__(
             The kernel evaluation is then averaged over those bandwidths.
         configure_kernel_from_x_ref
             Whether to already configure the kernel bandwidth from the reference data.
-        n_permutations
-            Number of permutations used in the permutation test.
         device
             Device type used. The default None tries to use the GPU and falls back on CPU if needed.
             Can be specified by passing either 'cuda', 'gpu' or 'cpu'.
@@ -194,7 +191,6 @@ def __init__(
             preprocess_fn=preprocess_fn,
             sigma=sigma,
             configure_kernel_from_x_ref=configure_kernel_from_x_ref,
-            n_permutations=n_permutations,
             input_shape=input_shape,
             data_type=data_type
         )
@@ -215,8 +211,11 @@ def __init__(
 
         # compute kernel matrix for the reference data
         if self.infer_sigma or isinstance(sigma, torch.Tensor):
-            x = torch.from_numpy(self.x_ref).to(self.device)
-            self.k_xx = self.kernel(x, x, infer_sigma=self.infer_sigma)
+            n = self.x_ref.shape[0]
+            n_hat = int(np.floor(n / 2) * 2)
+            x = torch.from_numpy(self.x_ref[:n_hat, :]).to(self.device)
+            self.k_xx = self.kernel(x=x[0::2, :], y=x[1::2, :],
+                                    pairwise=False, infer_sigma=self.infer_sigma)
             self.infer_sigma = False
         else:
             self.k_xx, self.infer_sigma = None, True
@@ -231,8 +230,9 @@ def kernel_matrix(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
 
     def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
         """
-        Compute the p-value resulting from a permutation test using the maximum mean discrepancy
-        as a distance measure between the reference data and the data to be tested.
+        Compute the p-value using the maximum mean discrepancy as a distance measure between the
+        reference data and the data to be tested. x and x_ref are required to have the same size.
+        The sample size is then specified as the maximal even number below the data size.
 
         Parameters
         ----------
@@ -247,14 +247,20 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
         x_ref, x = self.preprocess(x)
         n = x.shape[0]
         m = x_ref.shape[0]
-        n_hat = int(np.floor(min(n, m) / 2) * 2)
+        if n != m:
+            raise ValueError('x and x_ref must have the same size.')
+        n_hat = int(np.floor(n / 2) * 2)
         x_ref = torch.from_numpy(x_ref[:n_hat, :]).to(self.device)  # type: ignore[assignment]
         x = torch.from_numpy(x[:n_hat, :]).to(self.device)  # type: ignore[assignment]
-        mmd2, var_mmd2 = linear_mmd2(x_ref, x, self.kernel)  # type: ignore[arg-type]
+        if self.k_xx is not None and self.update_x_ref is None:
+            k_xx = self.k_xx
+        else:
+            k_xx = self.kernel(x=x_ref[0::2, :], y=x_ref[1::2, :], pairwise=False)
+        mmd2, var_mmd2 = linear_mmd2(k_xx, x_ref, x, self.kernel)  # type: ignore[arg-type]
         if self.device.type == 'cuda':
             mmd2 = mmd2.cpu()
         mmd2 = mmd2.numpy().item()
-        var_mmd2 = var_mmd2.numpy().item()
+        var_mmd2 = np.clip(var_mmd2.numpy().item(), 1e-8, 1e8)
         std_mmd2 = np.sqrt(var_mmd2)
         t = mmd2 / (std_mmd2 / np.sqrt(n_hat / 2.))
         p_val = 1 - stats.t.cdf(t, df=(n_hat / 2.) - 1)
diff --git a/alibi_detect/cd/pytorch/tests/test_linear_time_mmd_pt.py b/alibi_detect/cd/pytorch/tests/test_linear_time_mmd_pt.py
new file mode 100644
index 000000000..b24514200
--- /dev/null
+++ b/alibi_detect/cd/pytorch/tests/test_linear_time_mmd_pt.py
@@ -0,0 +1,96 @@
+from functools import partial
+from itertools import product
+import numpy as np
+import pytest
+import torch
+import torch.nn as nn
+from typing import Callable, List
+from alibi_detect.cd.pytorch.mmd import LinearTimeMMDDriftTorch
+from alibi_detect.cd.pytorch.preprocess import HiddenOutput, preprocess_drift
+
+n, n_hidden, n_classes = 500, 10, 5
+
+
+class MyModel(nn.Module):
+    def __init__(self, n_features: int):
+        super().__init__()
+        self.dense1 = nn.Linear(n_features, 20)
+        self.dense2 = nn.Linear(20, 2)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = nn.ReLU()(self.dense1(x))
+        return self.dense2(x)
+
+
+# test List[Any] inputs to the detector
+def preprocess_list(x: List[np.ndarray]) -> np.ndarray:
+    return np.concatenate(x, axis=0)
+
+
+n_features = [10]
+n_enc = [None, 3]
+preprocess = [
+    (None, None),
+    (preprocess_drift, {'model': HiddenOutput, 'layer': -1}),
+    (preprocess_list, None)
+]
+update_x_ref = [{'last': 500}, {'reservoir_sampling': 500}, None]
+preprocess_x_ref = [True, False]
+tests_mmddrift = list(product(n_features, n_enc, preprocess,
+                              update_x_ref, preprocess_x_ref))
+n_tests = len(tests_mmddrift)
+
+
+@pytest.fixture
+def mmd_params(request):
+    return tests_mmddrift[request.param]
+
+
+@pytest.mark.parametrize('mmd_params', list(range(n_tests)), indirect=True)
+def test_mmd(mmd_params):
+    n_features, n_enc, preprocess, update_x_ref, preprocess_x_ref = mmd_params
+
+    np.random.seed(0)
+    torch.manual_seed(0)
+
+    x_ref = np.random.randn(n * n_features).reshape(n, n_features).astype(np.float32)
+    preprocess_fn, preprocess_kwargs = preprocess
+    to_list = False
+    if hasattr(preprocess_fn, '__name__') and preprocess_fn.__name__ == 'preprocess_list':
+        if not preprocess_x_ref:
+            return
+        to_list = True
+        x_ref = [_[None, :] for _ in x_ref]
+    elif isinstance(preprocess_fn, Callable) and 'layer' in list(preprocess_kwargs.keys()) \
+            and preprocess_kwargs['model'].__name__ == 'HiddenOutput':
+        model = MyModel(n_features)
+        layer = preprocess_kwargs['layer']
+        preprocess_fn = partial(preprocess_fn, model=HiddenOutput(model=model, layer=layer))
+    else:
+        preprocess_fn = None
+
+    cd = LinearTimeMMDDriftTorch(
+        x_ref=x_ref,
+        p_val=.05,
+        preprocess_x_ref=preprocess_x_ref if isinstance(preprocess_fn, Callable) else False,
+        update_x_ref=update_x_ref,
+        preprocess_fn=preprocess_fn
+    )
+    x = x_ref.copy()
+    preds = cd.predict(x, return_p_val=True)
+    assert preds['data']['is_drift'] == 0 and preds['data']['p_val'] >= cd.p_val
+    if isinstance(update_x_ref, dict):
+        k = list(update_x_ref.keys())[0]
+        assert cd.n == len(x) + len(x_ref)
+        assert cd.x_ref.shape[0] == min(update_x_ref[k], len(x) + len(x_ref))
+
+    x_h1 = np.random.randn(n * n_features).reshape(n, n_features).astype(np.float32)
+    if to_list:
+        x_h1 = [_[None, :] for _ in x_h1]
+    preds = cd.predict(x_h1, return_p_val=True)
+    if preds['data']['is_drift'] == 1:
+        assert preds['data']['p_val'] < preds['data']['threshold'] == cd.p_val
+        assert preds['data']['distance'] > preds['data']['distance_threshold']
+    else:
+        assert preds['data']['p_val'] >= preds['data']['threshold'] == cd.p_val
+        assert preds['data']['distance'] <= preds['data']['distance_threshold']
diff --git a/alibi_detect/cd/tensorflow/mmd.py b/alibi_detect/cd/tensorflow/mmd.py
index eb136b583..8c4aefd13 100644
--- a/alibi_detect/cd/tensorflow/mmd.py
+++ b/alibi_detect/cd/tensorflow/mmd.py
@@ -133,7 +133,7 @@ def __init__(
             data_type: Optional[str] = None
     ) -> None:
         """
-        Maximum Mean Discrepancy (MMD) data drift detector using a permutation test, with linear-time estimator.
+        Maximum Mean Discrepancy (MMD) data drift detector using a linear-time estimator.
 
         Parameters
         ----------
@@ -156,8 +156,6 @@ def __init__(
             The kernel evaluation is then averaged over those bandwidths.
         configure_kernel_from_x_ref
             Whether to already configure the kernel bandwidth from the reference data.
-        n_permutations
-            Number of permutations used in the permutation test.
         input_shape
             Shape of input data.
         data_type
@@ -183,7 +181,11 @@ def __init__(
 
         # compute kernel matrix for the reference data
         if self.infer_sigma or isinstance(sigma, tf.Tensor):
-            self.k_xx = self.kernel(self.x_ref, self.x_ref, infer_sigma=self.infer_sigma)
+            n = self.x_ref.shape[0]
+            n_hat = int(np.floor(n / 2) * 2)
+            x = self.x_ref[:n_hat, :]
+            self.k_xx = self.kernel(x=x[0::2, :], y=x[1::2, :],
+                                    pairwise=False, infer_sigma=self.infer_sigma)
             self.infer_sigma = False
         else:
             self.k_xx, self.infer_sigma = None, True
@@ -198,8 +200,9 @@ def kernel_matrix(self, x: Union[np.ndarray, tf.Tensor], y: Union[np.ndarray, tf
 
     def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
         """
-        Compute the p-value resulting from a permutation test using the maximum mean discrepancy
-        as a distance measure between the reference data and the data to be tested.
+        Compute the p-value using the maximum mean discrepancy as a distance measure between the
+        reference data and the data to be tested. The sample size is specified as the maximal even
+        number in the smaller dataset between x and x_ref.
 
         Parameters
         ----------
@@ -215,12 +218,18 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, np.ndarray]:
         # compute kernel matrix, MMD^2 and apply permutation test using the kernel matrix
         n = x.shape[0]
         m = x_ref.shape[0]
-        n_hat = int(np.floor(min(n, m) / 2) * 2)
+        if n != m:
+            raise ValueError('x and x_ref must have the same size.')
+        n_hat = int(np.floor(n / 2) * 2)
         x_ref = x_ref[:n_hat, :]
         x = x[:n_hat, :]
-        mmd2, var_mmd2 = linear_mmd2(x_ref, x, self.kernel)
+        if self.k_xx is not None and self.update_x_ref is None:
+            k_xx = self.k_xx
+        else:
+            k_xx = self.kernel(x=x_ref[0::2, :], y=x_ref[1::2, :], pairwise=False)
+        mmd2, var_mmd2 = linear_mmd2(k_xx, x_ref, x, self.kernel)
         mmd2 = mmd2.numpy()
-        var_mmd2 = var_mmd2.numpy()
+        var_mmd2 = np.clip(var_mmd2.numpy(), 1e-8, 1e8)
         std_mmd2 = np.sqrt(var_mmd2)
         t = mmd2 / (std_mmd2 / np.sqrt(n_hat / 2.))
         p_val = 1 - stats.t.cdf(t, df=(n_hat / 2.) - 1)
diff --git a/alibi_detect/cd/tensorflow/tests/test_linear_time_mmd_tf.py b/alibi_detect/cd/tensorflow/tests/test_linear_time_mmd_tf.py
new file mode 100644
index 000000000..e1751e85c
--- /dev/null
+++ b/alibi_detect/cd/tensorflow/tests/test_linear_time_mmd_tf.py
@@ -0,0 +1,107 @@
+from functools import partial
+from itertools import product
+import numpy as np
+import pytest
+import tensorflow as tf
+from tensorflow.keras.layers import Dense, Input, InputLayer
+from typing import Callable, List
+from alibi_detect.cd.tensorflow.mmd import LinearTimeMMDDriftTF
+from alibi_detect.cd.tensorflow.preprocess import HiddenOutput, UAE, preprocess_drift
+
+n, n_hidden, n_classes = 500, 10, 5
+
+tf.random.set_seed(0)
+
+
+def mymodel(shape):
+    x_in = Input(shape=shape)
+    x = Dense(n_hidden)(x_in)
+    x_out = Dense(n_classes, activation='softmax')(x)
+    return tf.keras.models.Model(inputs=x_in, outputs=x_out)
+
+
+# test List[Any] inputs to the detector
+def preprocess_list(x: List[np.ndarray]) -> np.ndarray:
+    return np.concatenate(x, axis=0)
+
+
+n_features = [10]
+n_enc = [None, 3]
+preprocess = [
+    (None, None),
+    (preprocess_drift, {'model': HiddenOutput, 'layer': -1}),
+    (preprocess_drift, {'model': UAE}),
+    (preprocess_list, None)
+]
+update_x_ref = [{'last': 500}, {'reservoir_sampling': 500}, None]
+preprocess_x_ref = [True, False]
+tests_mmddrift = list(product(n_features, n_enc, preprocess,
+                              update_x_ref, preprocess_x_ref))
+n_tests = len(tests_mmddrift)
+
+
+@pytest.fixture
+def mmd_params(request):
+    return tests_mmddrift[request.param]
+
+
+@pytest.mark.parametrize('mmd_params', list(range(n_tests)), indirect=True)
+def test_mmd(mmd_params):
+    n_features, n_enc, preprocess, update_x_ref, preprocess_x_ref = mmd_params
+
+    np.random.seed(0)
+
+    x_ref = np.random.randn(n * n_features).reshape(n, n_features).astype(np.float32)
+    preprocess_fn, preprocess_kwargs = preprocess
+    to_list = False
+    if hasattr(preprocess_fn, '__name__') and preprocess_fn.__name__ == 'preprocess_list':
+        if not preprocess_x_ref:
+            return
+        to_list = True
+        x_ref = [_[None, :] for _ in x_ref]
+    elif isinstance(preprocess_fn, Callable):
+        if 'layer' in list(preprocess_kwargs.keys()) \
+                and preprocess_kwargs['model'].__name__ == 'HiddenOutput':
+            model = mymodel((n_features,))
+            layer = preprocess_kwargs['layer']
+            preprocess_fn = partial(preprocess_fn, model=HiddenOutput(model=model, layer=layer))
+        elif preprocess_kwargs['model'].__name__ == 'UAE' \
+                and n_features > 1 and isinstance(n_enc, int):
+            tf.random.set_seed(0)
+            encoder_net = tf.keras.Sequential(
+                [
+                    InputLayer(input_shape=(n_features,)),
+                    Dense(n_enc)
+                ]
+            )
+            preprocess_fn = partial(preprocess_fn, model=UAE(encoder_net=encoder_net))
+        else:
+            preprocess_fn = None
+    else:
+        preprocess_fn = None
+
+    cd = LinearTimeMMDDriftTF(
+        x_ref=x_ref,
+        p_val=.05,
+        preprocess_x_ref=preprocess_x_ref if isinstance(preprocess_fn, Callable) else False,
+        update_x_ref=update_x_ref,
+        preprocess_fn=preprocess_fn
+    )
+    x = x_ref.copy()
+    preds = cd.predict(x, return_p_val=True)
+    assert preds['data']['is_drift'] == 0 and preds['data']['p_val'] >= cd.p_val
+    if isinstance(update_x_ref, dict):
+        k = list(update_x_ref.keys())[0]
+        assert cd.n == len(x) + len(x_ref)
+        assert cd.x_ref.shape[0] == min(update_x_ref[k], len(x) + len(x_ref))
+
+    x_h1 = np.random.randn(n * n_features).reshape(n, n_features).astype(np.float32)
+    if to_list:
+        x_h1 = [_[None, :] for _ in x_h1]
+    preds = cd.predict(x_h1, return_p_val=True)
+    if preds['data']['is_drift'] == 1:
+        assert preds['data']['p_val'] < preds['data']['threshold'] == cd.p_val
+        assert preds['data']['distance'] > preds['data']['distance_threshold']
+    else:
+        assert preds['data']['p_val'] >= preds['data']['threshold'] == cd.p_val
+        assert preds['data']['distance'] <= preds['data']['distance_threshold']
diff --git a/alibi_detect/utils/pytorch/distance.py b/alibi_detect/utils/pytorch/distance.py
index e26f6f380..714598b81 100644
--- a/alibi_detect/utils/pytorch/distance.py
+++ b/alibi_detect/utils/pytorch/distance.py
@@ -8,7 +8,12 @@
 
 
 @torch.jit.script
-def squared_pairwise_distance(x: torch.Tensor, y: torch.Tensor, a_min: float = 1e-30) -> torch.Tensor:
+def squared_pairwise_distance(
+    x: torch.Tensor,
+    y: torch.Tensor,
+    a_min: float = 1e-30,
+    pairwise: bool = False
+) -> torch.Tensor:
     """
     PyTorch pairwise squared Euclidean distance between samples x and y.
 
@@ -20,13 +25,18 @@ def squared_pairwise_distance(x: torch.Tensor, y: torch.Tensor, a_min: float = 1
         Batch of instances of shape [Ny, features].
     a_min
         Lower bound to clip distance values.
+    pairwise
+        Whether to compute pairwise distances or not.
     Returns
     -------
     Pairwise squared Euclidean distance [Nx, Ny].
     """
     x2 = x.pow(2).sum(dim=-1, keepdim=True)
     y2 = y.pow(2).sum(dim=-1, keepdim=True)
-    dist = torch.addmm(y2.transpose(-2, -1), x, y.transpose(-2, -1), alpha=-2).add_(x2)
+    if pairwise:
+        dist = torch.addmm(y2.transpose(-2, -1), x, y.transpose(-2, -1), alpha=-2).add_(x2)
+    else:
+        dist = x2 + y2 - (2 * x * y).sum(dim=-1, keepdim=True)
     return dist.clamp_min_(a_min)
 
 
@@ -116,6 +126,7 @@ def batch_compute_kernel_matrix(
 
 
 def linear_mmd2(
+    k_xx: torch.Tensor,
     x: torch.Tensor,
     y: torch.Tensor,
     kernel: Callable
@@ -136,16 +147,10 @@ def linear_mmd2(
     -------
     MMD^2 between the samples.
     """
-    n = np.shape(x)[0]
-    m = np.shape(y)[0]
-    if n != m:
-        raise RuntimeError("Linear-time estimator requires equal size samples")
-    k_xx = kernel(x=x[0::2, :], y=x[1::2, :], pairwise=False)
     k_yy = kernel(x=y[0::2, :], y=y[1::2, :], pairwise=False)
     k_xy = kernel(x=x[0::2, :], y=y[1::2, :], pairwise=False)
-    k_yz = kernel(x=y[0::2, :], y=x[1::2, :], pairwise=False)
-
-    h = k_xx + k_yy - k_xy - k_yz
+    k_yx = kernel(x=y[0::2, :], y=x[1::2, :], pairwise=False)
+    h = k_xx + k_yy - k_xy - k_yx
     mmd2 = h.mean()
     var_mmd2 = torch.var(h, unbiased=True)
     return mmd2, var_mmd2
diff --git a/alibi_detect/utils/pytorch/kernels.py b/alibi_detect/utils/pytorch/kernels.py
index 0e4390976..15f2ad700 100644
--- a/alibi_detect/utils/pytorch/kernels.py
+++ b/alibi_detect/utils/pytorch/kernels.py
@@ -5,7 +5,12 @@
 from typing import Optional, Union, Callable
 
 
-def sigma_median(x: torch.Tensor, y: torch.Tensor, dist: torch.Tensor) -> torch.Tensor:
+def sigma_median(
+    x: torch.Tensor,
+    y: torch.Tensor,
+    dist: torch.Tensor,
+    pairwise: bool
+) -> torch.Tensor:
     """
     Bandwidth estimation using the median heuristic :cite:t:`Gretton2012`.
 
@@ -16,16 +21,21 @@ def sigma_median(x: torch.Tensor, y: torch.Tensor, dist: torch.Tensor) -> torch.
     y
         Tensor of instances with dimension [Ny, features].
     dist
-        Tensor with dimensions [Nx, Ny], containing the pairwise distances between `x` and `y`.
-
+        Tensor with dimensions [Nx, Ny] when pairwise=True, containing the pairwise distances between `x` and `y`.
+        Dimensions are [Nx, 1] when pairwise=False.
+    pairwise
+        Whether the distances are pairwise.
     Returns
     -------
     The computed bandwidth, `sigma`.
     """
-    n = min(x.shape[0], y.shape[0])
-    n = n if (x[:n] == y[:n]).all() and x.shape == y.shape else 0
-    n_median = n + (np.prod(dist.shape) - n) // 2 - 1
-    sigma = (.5 * dist.flatten().sort().values[n_median].unsqueeze(dim=-1)) ** .5
+    if pairwise:
+        n = min(x.shape[0], y.shape[0])
+        n = n if (x[:n] == y[:n]).all() and x.shape == y.shape else 0
+        n_median = n + (np.prod(dist.shape) - n) // 2 - 1
+        sigma = (.5 * dist.flatten().sort().values[n_median].unsqueeze(dim=-1)) ** .5
+    else:
+        sigma = (.5 * dist.flatten().sort().values[dist.shape[0] // 2 - 1].unsqueeze(dim=-1)) ** .5
     return sigma
 
 
@@ -68,24 +78,22 @@ def __init__(
     def sigma(self) -> torch.Tensor:
         return self.log_sigma.exp()
 
-    def forward(self, x: Union[np.ndarray, torch.Tensor],
-                y: Union[np.ndarray, torch.Tensor],
-                infer_sigma: bool = False,
-                pairwise: bool = True) -> torch.Tensor:
+    def forward(
+        self, x: Union[np.ndarray, torch.Tensor],
+        y: Union[np.ndarray, torch.Tensor],
+        infer_sigma: bool = False,
+        pairwise: bool = True
+    ) -> torch.Tensor:
 
         x, y = torch.as_tensor(x), torch.as_tensor(y)
-        if pairwise:
-            dist = distance.squared_pairwise_distance(x.flatten(1), y.flatten(1))  # [Nx, Ny]
-        else:
-            dist = distance.squared_distance(x.flatten(1), y.flatten(1))  # [N, 1]
+        dist = distance.squared_pairwise_distance(x=x.flatten(1),
+                                                  y=y.flatten(1),
+                                                  pairwise=pairwise)  # [Nx, Ny] or [Nx, 1]
 
         if infer_sigma or self.init_required:
             if self.trainable and infer_sigma:
                 raise ValueError("Gradients cannot be computed w.r.t. an inferred sigma value")
-            if pairwise:
-                sigma = self.init_sigma_fn(x, y, dist)
-            else:
-                sigma = (.5 * dist.flatten().sort().values[dist.shape[0] // 2 - 1].unsqueeze(dim=-1)) ** .5
+            sigma = self.init_sigma_fn(x, y, dist, pairwise)
             with torch.no_grad():
                 self.log_sigma.copy_(sigma.log().clone())
             self.init_required = False
diff --git a/alibi_detect/utils/tensorflow/distance.py b/alibi_detect/utils/tensorflow/distance.py
index 82cac2c50..18e473a9f 100644
--- a/alibi_detect/utils/tensorflow/distance.py
+++ b/alibi_detect/utils/tensorflow/distance.py
@@ -6,7 +6,13 @@
 logger = logging.getLogger(__name__)
 
 
-def squared_pairwise_distance(x: tf.Tensor, y: tf.Tensor, a_min: float = 1e-30, a_max: float = 1e30) -> tf.Tensor:
+def squared_pairwise_distance(
+    x: tf.Tensor,
+    y: tf.Tensor,
+    a_min: float = 1e-30,
+    a_max: float = 1e30,
+    pairwise: bool = False
+) -> tf.Tensor:
     """
     TensorFlow pairwise squared Euclidean distance between samples x and y.
 
@@ -20,14 +26,18 @@ def squared_pairwise_distance(x: tf.Tensor, y: tf.Tensor, a_min: float = 1e-30,
         Lower bound to clip distance values.
     a_max
         Upper bound to clip distance values.
-
+    pairwise
+        Whether to compute pairwise distances or not.
     Returns
     -------
     Pairwise squared Euclidean distance [Nx, Ny].
     """
     x2 = tf.reduce_sum(x ** 2, axis=-1, keepdims=True)
     y2 = tf.reduce_sum(y ** 2, axis=-1, keepdims=True)
-    dist = x2 + tf.transpose(y2, (1, 0)) - 2. * x @ tf.transpose(y, (1, 0))
+    if pairwise:
+        dist = x2 + tf.transpose(y2, (1, 0)) - 2. * x @ tf.transpose(y, (1, 0))
+    else:
+        dist = x2 + y2 - 2. * tf.reduce_sum(x * y, axis=-1, keepdims=True)
     return tf.clip_by_value(dist, a_min, a_max)
 
 
@@ -110,6 +120,7 @@ def batch_compute_kernel_matrix(
 
 
 def linear_mmd2(
+    k_xx: tf.Tensor,
     x: tf.Tensor,
     y: tf.Tensor,
     kernel: Callable
@@ -126,18 +137,12 @@ def linear_mmd2(
         Batch of instances of shape [Ny, features].
     kernel
         Kernel function.
-    permute
-        Whether to permute the row indices. Used for permutation tests.
     """
-    n = np.shape(x)[0]
-    m = np.shape(y)[0]
-    if n != m:
-        raise RuntimeError("Linear-time estimator requires equal size samples")
-    k_xx = kernel(x=x[0::2, :], y=x[1::2, :], pairwise=False)
+    n = x.shape[0]
     k_yy = kernel(x=y[0::2, :], y=y[1::2, :], pairwise=False)
     k_xy = kernel(x=x[0::2, :], y=y[1::2, :], pairwise=False)
-    k_yz = kernel(x=y[0::2, :], y=x[1::2, :], pairwise=False)
-    h = k_xx + k_yy - k_xy - k_yz
+    k_yx = kernel(x=y[0::2, :], y=x[1::2, :], pairwise=False)
+    h = k_xx + k_yy - k_xy - k_yx
     mmd2 = tf.reduce_mean(h)
     var_mmd2 = tf.math.reduce_sum(h ** 2) / ((n / 2.) - 1) - (mmd2 ** 2)
     return mmd2, var_mmd2
diff --git a/alibi_detect/utils/tensorflow/kernels.py b/alibi_detect/utils/tensorflow/kernels.py
index 787e6a502..5d773ddf3 100644
--- a/alibi_detect/utils/tensorflow/kernels.py
+++ b/alibi_detect/utils/tensorflow/kernels.py
@@ -5,7 +5,12 @@
 from scipy.special import logit
 
 
-def sigma_median(x: tf.Tensor, y: tf.Tensor, dist: tf.Tensor) -> tf.Tensor:
+def sigma_median(
+    x: tf.Tensor,
+    y: tf.Tensor,
+    dist: tf.Tensor,
+    pairwise: bool
+) -> tf.Tensor:
     """
     Bandwidth estimation using the median heuristic :cite:t:`Gretton2012`.
 
@@ -17,15 +22,19 @@ def sigma_median(x: tf.Tensor, y: tf.Tensor, dist: tf.Tensor) -> tf.Tensor:
         Tensor of instances with dimension [Ny, features].
     dist
         Tensor with dimensions [Nx, Ny], containing the pairwise distances between `x` and `y`.
-
+    pairwise
+        Whether the distances are pairwise.
     Returns
     -------
     The computed bandwidth, `sigma`.
     """
-    n = min(x.shape[0], y.shape[0])
-    n = n if tf.reduce_all(x[:n] == y[:n]) and x.shape == y.shape else 0
-    n_median = n + (tf.math.reduce_prod(dist.shape) - n) // 2 - 1
-    sigma = tf.expand_dims((.5 * tf.sort(tf.reshape(dist, (-1,)))[n_median]) ** .5, axis=0)
+    if pairwise:
+        n = min(x.shape[0], y.shape[0])
+        n = n if tf.reduce_all(x[:n] == y[:n]) and x.shape == y.shape else 0
+        n_median = n + (tf.math.reduce_prod(dist.shape) - n) // 2 - 1
+        sigma = tf.expand_dims((.5 * tf.sort(tf.reshape(dist, (-1,)))[n_median]) ** .5, axis=0)
+    else:
+        sigma = tf.expand_dims((.5 * tf.sort(tf.reshape(dist, (-1,)))[dist.shape[0] // 2 - 1]) ** .5, axis=0)
     return sigma
 
 
@@ -69,23 +78,20 @@ def __init__(
     def sigma(self) -> tf.Tensor:
         return tf.math.exp(self.log_sigma)
 
-    def call(self, x: tf.Tensor, y: tf.Tensor,
-             infer_sigma: bool = False,
-             pairwise: bool = True) -> tf.Tensor:
+    def call(
+        self, x: tf.Tensor,
+        y: tf.Tensor,
+        infer_sigma: bool = False,
+        pairwise: bool = True
+    ) -> tf.Tensor:
         y = tf.cast(y, x.dtype)
         x, y = tf.reshape(x, (x.shape[0], -1)), tf.reshape(y, (y.shape[0], -1))  # flatten
-        if pairwise:
-            dist = distance.squared_pairwise_distance(x, y)  # [Nx, Ny]
-        else:
-            dist = distance.squared_distance(x, y)  # [Nx]
+        dist = distance.squared_pairwise_distance(x=x, y=y, pairwise=pairwise)  # [Nx, Ny] or [Nx, 1]
 
         if infer_sigma or self.init_required:
             if self.trainable and infer_sigma:
                 raise ValueError("Gradients cannot be computed w.r.t. an inferred sigma value")
-            if pairwise:
-                sigma = self.init_sigma_fn(x, y, dist)
-            else:
-                sigma = tf.expand_dims((.5 * tf.sort(tf.reshape(dist, (-1,)))[dist.shape[0] // 2 - 1]) ** .5, axis=0)
+            sigma = self.init_sigma_fn(x, y, dist, pairwise)
             self.log_sigma.assign(tf.math.log(sigma))
             self.init_required = False
 
diff --git a/doc/source/cd/methods/mmddrift.ipynb b/doc/source/cd/methods/mmddrift.ipynb
index 3f9c7e64e..f9a7211a7 100644
--- a/doc/source/cd/methods/mmddrift.ipynb
+++ b/doc/source/cd/methods/mmddrift.ipynb
@@ -48,6 +48,8 @@
     "\n",
     "* `p_val`: p-value used for significance of the permutation test.\n",
     "\n",
+    "* `estimator`: Estimator used for the MMD^2 computation {'quad', 'linear'}. 'Quad' is the default and uses the quadratic u-statistics on each square kernel matrix. 'Linear' uses the linear time estimator as in Gretton et al. (2014), and the threshold is computed using the Gaussian asympotic distribution under null.\n",
+    "\n",
     "* `preprocess_x_ref`: Whether to already apply the (optional) preprocessing step to the reference data at initialization and store the preprocessed data. Dependent on the preprocessing step, this can reduce the computation time for the predict step significantly, especially when the reference dataset is large. Defaults to *True*. It is possible that it needs to be set to *False* if the preprocessing step requires statistics from both the reference and test data, such as the mean or standard deviation.\n",
     "\n",
     "* `update_x_ref`: Reference data can optionally be updated to the last N instances seen by the detector or via [reservoir sampling](https://en.wikipedia.org/wiki/Reservoir_sampling) with size N. For the former, the parameter equals *{'last': N}* while for reservoir sampling *{'reservoir_sampling': N}* is passed.\n",

From 016f23f92f2e38a77c60988d5ec307652dbfac57 Mon Sep 17 00:00:00 2001
From: Hao Song <nuacesh@gmail.com>
Date: Mon, 9 May 2022 11:52:22 +0100
Subject: [PATCH 12/14] fixes on the distance function.

---
 alibi_detect/utils/pytorch/distance.py    |  2 +-
 alibi_detect/utils/pytorch/kernels.py     | 19 ++++++++-----------
 alibi_detect/utils/tensorflow/distance.py |  2 +-
 alibi_detect/utils/tensorflow/kernels.py  | 21 +++++++++------------
 4 files changed, 19 insertions(+), 25 deletions(-)

diff --git a/alibi_detect/utils/pytorch/distance.py b/alibi_detect/utils/pytorch/distance.py
index 714598b81..85a1ed710 100644
--- a/alibi_detect/utils/pytorch/distance.py
+++ b/alibi_detect/utils/pytorch/distance.py
@@ -12,7 +12,7 @@ def squared_pairwise_distance(
     x: torch.Tensor,
     y: torch.Tensor,
     a_min: float = 1e-30,
-    pairwise: bool = False
+    pairwise: bool = True
 ) -> torch.Tensor:
     """
     PyTorch pairwise squared Euclidean distance between samples x and y.
diff --git a/alibi_detect/utils/pytorch/kernels.py b/alibi_detect/utils/pytorch/kernels.py
index 15f2ad700..18ecc66df 100644
--- a/alibi_detect/utils/pytorch/kernels.py
+++ b/alibi_detect/utils/pytorch/kernels.py
@@ -9,7 +9,6 @@ def sigma_median(
     x: torch.Tensor,
     y: torch.Tensor,
     dist: torch.Tensor,
-    pairwise: bool
 ) -> torch.Tensor:
     """
     Bandwidth estimation using the median heuristic :cite:t:`Gretton2012`.
@@ -23,19 +22,14 @@ def sigma_median(
     dist
         Tensor with dimensions [Nx, Ny] when pairwise=True, containing the pairwise distances between `x` and `y`.
         Dimensions are [Nx, 1] when pairwise=False.
-    pairwise
-        Whether the distances are pairwise.
     Returns
     -------
     The computed bandwidth, `sigma`.
     """
-    if pairwise:
-        n = min(x.shape[0], y.shape[0])
-        n = n if (x[:n] == y[:n]).all() and x.shape == y.shape else 0
-        n_median = n + (np.prod(dist.shape) - n) // 2 - 1
-        sigma = (.5 * dist.flatten().sort().values[n_median].unsqueeze(dim=-1)) ** .5
-    else:
-        sigma = (.5 * dist.flatten().sort().values[dist.shape[0] // 2 - 1].unsqueeze(dim=-1)) ** .5
+    n = min(x.shape[0], y.shape[0])
+    n = n if (x[:n] == y[:n]).all() and x.shape == y.shape else 0
+    n_median = n + (np.prod(dist.shape) - n) // 2 - 1
+    sigma = (.5 * dist.flatten().sort().values[n_median].unsqueeze(dim=-1)) ** .5
     return sigma
 
 
@@ -93,7 +87,10 @@ def forward(
         if infer_sigma or self.init_required:
             if self.trainable and infer_sigma:
                 raise ValueError("Gradients cannot be computed w.r.t. an inferred sigma value")
-            sigma = self.init_sigma_fn(x, y, dist, pairwise)
+            if pairwise:
+                sigma = self.init_sigma_fn(x, y, dist)
+            else:
+                sigma = (.5 * dist.flatten().sort().values[dist.shape[0] // 2 - 1].unsqueeze(dim=-1)) ** .5
             with torch.no_grad():
                 self.log_sigma.copy_(sigma.log().clone())
             self.init_required = False
diff --git a/alibi_detect/utils/tensorflow/distance.py b/alibi_detect/utils/tensorflow/distance.py
index 18e473a9f..ddc66b55f 100644
--- a/alibi_detect/utils/tensorflow/distance.py
+++ b/alibi_detect/utils/tensorflow/distance.py
@@ -11,7 +11,7 @@ def squared_pairwise_distance(
     y: tf.Tensor,
     a_min: float = 1e-30,
     a_max: float = 1e30,
-    pairwise: bool = False
+    pairwise: bool = True
 ) -> tf.Tensor:
     """
     TensorFlow pairwise squared Euclidean distance between samples x and y.
diff --git a/alibi_detect/utils/tensorflow/kernels.py b/alibi_detect/utils/tensorflow/kernels.py
index 5d773ddf3..54aec9b78 100644
--- a/alibi_detect/utils/tensorflow/kernels.py
+++ b/alibi_detect/utils/tensorflow/kernels.py
@@ -8,8 +8,7 @@
 def sigma_median(
     x: tf.Tensor,
     y: tf.Tensor,
-    dist: tf.Tensor,
-    pairwise: bool
+    dist: tf.Tensor
 ) -> tf.Tensor:
     """
     Bandwidth estimation using the median heuristic :cite:t:`Gretton2012`.
@@ -22,19 +21,14 @@ def sigma_median(
         Tensor of instances with dimension [Ny, features].
     dist
         Tensor with dimensions [Nx, Ny], containing the pairwise distances between `x` and `y`.
-    pairwise
-        Whether the distances are pairwise.
     Returns
     -------
     The computed bandwidth, `sigma`.
     """
-    if pairwise:
-        n = min(x.shape[0], y.shape[0])
-        n = n if tf.reduce_all(x[:n] == y[:n]) and x.shape == y.shape else 0
-        n_median = n + (tf.math.reduce_prod(dist.shape) - n) // 2 - 1
-        sigma = tf.expand_dims((.5 * tf.sort(tf.reshape(dist, (-1,)))[n_median]) ** .5, axis=0)
-    else:
-        sigma = tf.expand_dims((.5 * tf.sort(tf.reshape(dist, (-1,)))[dist.shape[0] // 2 - 1]) ** .5, axis=0)
+    n = min(x.shape[0], y.shape[0])
+    n = n if tf.reduce_all(x[:n] == y[:n]) and x.shape == y.shape else 0
+    n_median = n + (tf.math.reduce_prod(dist.shape) - n) // 2 - 1
+    sigma = tf.expand_dims((.5 * tf.sort(tf.reshape(dist, (-1,)))[n_median]) ** .5, axis=0)
     return sigma
 
 
@@ -91,7 +85,10 @@ def call(
         if infer_sigma or self.init_required:
             if self.trainable and infer_sigma:
                 raise ValueError("Gradients cannot be computed w.r.t. an inferred sigma value")
-            sigma = self.init_sigma_fn(x, y, dist, pairwise)
+            if pairwise:
+                sigma = self.init_sigma_fn(x, y, dist)
+            else:
+                sigma = tf.expand_dims((.5 * tf.sort(tf.reshape(dist, (-1,)))[dist.shape[0] // 2 - 1]) ** .5, axis=0)
             self.log_sigma.assign(tf.math.log(sigma))
             self.init_required = False
 

From 3b96ad46995b47de87058f0eaf57ab2323c202d0 Mon Sep 17 00:00:00 2001
From: Ashley Scillitoe <ashley.scillitoe@seldon.io>
Date: Tue, 26 Jul 2022 15:40:41 +0100
Subject: [PATCH 13/14] Update tests and pydantic models

---
 alibi_detect/cd/mmd.py                   | 7 ++++---
 alibi_detect/saving/schemas.py           | 2 ++
 alibi_detect/saving/tests/test_saving.py | 7 +++++--
 doc/source/cd/methods/mmddrift.ipynb     | 2 +-
 4 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/alibi_detect/cd/mmd.py b/alibi_detect/cd/mmd.py
index 73cf34e41..1ceff50a9 100644
--- a/alibi_detect/cd/mmd.py
+++ b/alibi_detect/cd/mmd.py
@@ -4,6 +4,7 @@
 from alibi_detect.utils.frameworks import has_pytorch, has_tensorflow, BackendValidator
 from alibi_detect.utils.warnings import deprecated_alias
 from alibi_detect.base import DriftConfigMixin
+from alibi_detect.utils._types import Literal
 
 if has_pytorch:
     from alibi_detect.cd.pytorch.mmd import MMDDriftTorch, LinearTimeMMDDriftTorch
@@ -21,7 +22,7 @@ def __init__(
             x_ref: Union[np.ndarray, list],
             backend: str = 'tensorflow',
             p_val: float = .05,
-            estimator: str = 'quad',
+            estimator: Literal['quad', 'linear'] = 'quad',
             x_ref_preprocessed: bool = False,
             preprocess_at_init: bool = True,
             update_x_ref: Optional[Dict[str, int]] = None,
@@ -46,8 +47,8 @@ def __init__(
         p_val
             p-value used for the significance of the permutation test.
         estimator
-            Estimator used for the MMD^2 computation {'quad', 'linear'}. 'Quad' is the default and
-            uses the quadratic u-statistics on each square kernel matrix. 'Linear' uses the linear
+            Estimator used for the MMD^2 computation. 'quad' is the default and
+            uses the quadratic u-statistics on each square kernel matrix. 'linear' uses the linear
             time estimator as in Gretton et al. (JMLR 2014, sec 6), and the threshold is computed
             using the Gaussian asympotic distribution under null.
         x_ref_preprocessed
diff --git a/alibi_detect/saving/schemas.py b/alibi_detect/saving/schemas.py
index c80ce4db0..559d52c90 100644
--- a/alibi_detect/saving/schemas.py
+++ b/alibi_detect/saving/schemas.py
@@ -628,6 +628,7 @@ class MMDDriftConfig(DriftDetectorConfig):
     :class:`~alibi_detect.cd.MMDDrift` documentation for a description of each field.
     """
     p_val: float = .05
+    estimator: Literal['quad', 'linear'] = 'quad'
     preprocess_at_init: bool = True
     update_x_ref: Optional[Dict[str, int]] = None
     kernel: Optional[Union[str, KernelConfig]] = None
@@ -646,6 +647,7 @@ class MMDDriftConfigResolved(DriftDetectorConfigResolved):
     :class:`~alibi_detect.cd.MMDDrift` documentation for a description of each field.
     """
     p_val: float = .05
+    estimator: Literal['quad', 'linear'] = 'quad'
     preprocess_at_init: bool = True
     update_x_ref: Optional[Dict[str, int]] = None
     kernel: Optional[Callable] = None
diff --git a/alibi_detect/saving/tests/test_saving.py b/alibi_detect/saving/tests/test_saving.py
index e99d2a6f8..105f34b07 100644
--- a/alibi_detect/saving/tests/test_saving.py
+++ b/alibi_detect/saving/tests/test_saving.py
@@ -355,8 +355,9 @@ def test_save_cvmdrift(data, preprocess_custom, tmp_path):
         {'sigma': 0.5, 'trainable': False},  # pass kernel as object
     ], indirect=True
 )
+@parametrize('estimator', ['quad', 'linear'])
 @parametrize_with_cases("data", cases=ContinuousData, prefix='data_')
-def test_save_mmddrift(data, kernel, preprocess_custom, backend, tmp_path, seed):
+def test_save_mmddrift(data, kernel, estimator, preprocess_custom, backend, tmp_path, seed):
     """
     Test MMDDrift on continuous datasets, with UAE as preprocess_fn.
 
@@ -368,6 +369,7 @@ def test_save_mmddrift(data, kernel, preprocess_custom, backend, tmp_path, seed)
         cd = MMDDrift(X_ref,
                       p_val=P_VAL,
                       backend=backend,
+                      estimator=estimator,
                       preprocess_fn=preprocess_custom,
                       n_permutations=N_PERMUTATIONS,
                       preprocess_at_init=True,
@@ -386,7 +388,8 @@ def test_save_mmddrift(data, kernel, preprocess_custom, backend, tmp_path, seed)
     # assertions
     np.testing.assert_array_equal(preprocess_custom(X_ref), cd_load._detector.x_ref)
     assert not cd_load._detector.infer_sigma
-    assert cd_load._detector.n_permutations == N_PERMUTATIONS
+    if estimator == 'quad':
+        assert cd_load._detector.n_permutations == N_PERMUTATIONS
     assert cd_load._detector.p_val == P_VAL
     assert isinstance(cd_load._detector.preprocess_fn, Callable)
     assert cd_load._detector.preprocess_fn.func.__name__ == 'preprocess_drift'
diff --git a/doc/source/cd/methods/mmddrift.ipynb b/doc/source/cd/methods/mmddrift.ipynb
index 7b7ecf32e..99b22272d 100644
--- a/doc/source/cd/methods/mmddrift.ipynb
+++ b/doc/source/cd/methods/mmddrift.ipynb
@@ -48,7 +48,7 @@
     "\n",
     "* `p_val`: p-value used for significance of the permutation test.\n",
     "\n",
-    "* `estimator`: Estimator used for the MMD^2 computation {'quad', 'linear'}. 'Quad' is the default and uses the quadratic u-statistics on each square kernel matrix. 'Linear' uses the linear time estimator as in Gretton et al. (2014), and the threshold is computed using the Gaussian asympotic distribution under null.\n",
+    "* `estimator`: Estimator used for the MMD^2 computation. *'quad'* is the default and uses the quadratic u-statistics on each square kernel matrix. *'linear'* uses the linear time estimator as in Gretton et al. (2014), and the threshold is computed using the Gaussian asympotic distribution under null.\n",
     "\n",
     "* `preprocess_at_init`: Whether to already apply the (optional) preprocessing step to the reference data at initialization and store the preprocessed data. Dependent on the preprocessing step, this can reduce the computation time for the predict step significantly, especially when the reference dataset is large. Defaults to *True*. It is possible that it needs to be set to *False* if the preprocessing step requires statistics from both the reference and test data, such as the mean or standard deviation.\n",
     "\n",

From 95f3de4173c91e685aac9acde1405a392132c7e6 Mon Sep 17 00:00:00 2001
From: Hao Song <nuacesh@gmail.com>
Date: Wed, 27 Jul 2022 16:28:47 +0100
Subject: [PATCH 14/14] Added estimator to mmd tests, changed default
 permutation to none for base MMD class.

---
 alibi_detect/cd/base.py           |  2 +-
 alibi_detect/cd/mmd.py            |  1 +
 alibi_detect/cd/tests/test_mmd.py | 31 ++++++++++++++++++++++++-------
 3 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/alibi_detect/cd/base.py b/alibi_detect/cd/base.py
index 937a08c7d..9c1d925f2 100644
--- a/alibi_detect/cd/base.py
+++ b/alibi_detect/cd/base.py
@@ -506,7 +506,7 @@ def __init__(
             preprocess_fn: Optional[Callable] = None,
             sigma: Optional[np.ndarray] = None,
             configure_kernel_from_x_ref: bool = True,
-            n_permutations: int = 100,
+            n_permutations: int = None,
             input_shape: Optional[tuple] = None,
             data_type: Optional[str] = None
     ) -> None:
diff --git a/alibi_detect/cd/mmd.py b/alibi_detect/cd/mmd.py
index 1ceff50a9..38f8f10f8 100644
--- a/alibi_detect/cd/mmd.py
+++ b/alibi_detect/cd/mmd.py
@@ -88,6 +88,7 @@ def __init__(
         self._set_config(locals())
 
         backend = backend.lower()
+        estimator = estimator.lower()  # type: ignore
         BackendValidator(
             backend_options={'tensorflow': ['tensorflow'],
                              'pytorch': ['pytorch']},
diff --git a/alibi_detect/cd/tests/test_mmd.py b/alibi_detect/cd/tests/test_mmd.py
index 33e776e14..93adf70a1 100644
--- a/alibi_detect/cd/tests/test_mmd.py
+++ b/alibi_detect/cd/tests/test_mmd.py
@@ -1,12 +1,15 @@
+import itertools
 import numpy as np
 import pytest
 from alibi_detect.cd import MMDDrift
-from alibi_detect.cd.pytorch.mmd import MMDDriftTorch
-from alibi_detect.cd.tensorflow.mmd import MMDDriftTF
+from alibi_detect.cd.pytorch.mmd import MMDDriftTorch, LinearTimeMMDDriftTorch
+from alibi_detect.cd.tensorflow.mmd import MMDDriftTF, LinearTimeMMDDriftTF
 
 n, n_features = 100, 5
 
-tests_mmddrift = ['tensorflow', 'pytorch', 'PyToRcH', 'mxnet']
+tests_backend_mmddrift = ['tensorflow', 'pytorch', 'PyToRcH', 'mxnet']
+tests_estimator_mmddrift = ['quad', 'linear', 'Quad', 'Linear', 'cubic']
+tests_mmddrift = list(itertools.product(tests_backend_mmddrift, tests_estimator_mmddrift))
 n_tests = len(tests_mmddrift)
 
 
@@ -17,17 +20,31 @@ def mmddrift_params(request):
 
 @pytest.mark.parametrize('mmddrift_params', list(range(n_tests)), indirect=True)
 def test_mmddrift(mmddrift_params):
-    backend = mmddrift_params
+    backend, estimator = mmddrift_params
     x_ref = np.random.randn(*(n, n_features))
 
     try:
-        cd = MMDDrift(x_ref=x_ref, backend=backend)
+        cd = MMDDrift(x_ref=x_ref, backend=backend, estimator=estimator)
     except NotImplementedError:
         cd = None
 
     if backend.lower() == 'pytorch':
-        assert isinstance(cd._detector, MMDDriftTorch)
+        if estimator.lower() == 'quad':
+            assert isinstance(cd._detector, MMDDriftTorch)
+            assert isinstance(cd._detector.n_permutations, int)
+        elif estimator.lower() == 'linear':
+            assert isinstance(cd._detector, LinearTimeMMDDriftTorch)
+            assert hasattr(cd._detector, 'n_permutations')
+        else:
+            assert cd is None
     elif backend.lower() == 'tensorflow':
-        assert isinstance(cd._detector, MMDDriftTF)
+        if estimator.lower() == 'quad':
+            assert isinstance(cd._detector, MMDDriftTF)
+            assert isinstance(cd._detector.n_permutations, int)
+        elif estimator.lower() == 'linear':
+            assert isinstance(cd._detector, LinearTimeMMDDriftTF)
+            assert hasattr(cd._detector, 'n_permutations')
+        else:
+            assert cd is None
     else:
         assert cd is None