From bf922e1460539f0a630130e3c790fa353b6c6a68 Mon Sep 17 00:00:00 2001
From: Gokcen Eraslan <gokcen.eraslan@gmail.com>
Date: Mon, 2 Dec 2019 10:52:18 -0500
Subject: [PATCH 01/28] Add replace option to subsample.

---
 scanpy/preprocessing/_simple.py    | 5 ++++-
 scanpy/tests/test_preprocessing.py | 2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/scanpy/preprocessing/_simple.py b/scanpy/preprocessing/_simple.py
index 9a9a23bb97..5a339d8dab 100644
--- a/scanpy/preprocessing/_simple.py
+++ b/scanpy/preprocessing/_simple.py
@@ -941,6 +941,7 @@ def subsample(
     n_obs: Optional[int] = None,
     random_state: Union[int, RandomState] = 0,
     copy: bool = False,
+    replace: bool = False,
 ) -> Optional[AnnData]:
     """\
     Subsample to a fraction of the number of observations.
@@ -959,6 +960,8 @@ def subsample(
     copy
         If an :class:`~anndata.AnnData` is passed,
         determines whether a copy is returned.
+    replace
+        If True, samples are drawn with replacement.
 
     Returns
     -------
@@ -979,7 +982,7 @@ def subsample(
         logg.debug(f'... subsampled to {new_n_obs} data points')
     else:
         raise ValueError('Either pass `n_obs` or `fraction`.')
-    obs_indices = np.random.choice(old_n_obs, size=new_n_obs, replace=False)
+    obs_indices = np.random.choice(old_n_obs, size=new_n_obs, replace=replace)
     if isinstance(data, AnnData):
         adata = data.copy() if copy else data
         adata._inplace_subset_obs(obs_indices)
diff --git a/scanpy/tests/test_preprocessing.py b/scanpy/tests/test_preprocessing.py
index 8c1add73a3..6496750568 100644
--- a/scanpy/tests/test_preprocessing.py
+++ b/scanpy/tests/test_preprocessing.py
@@ -99,6 +99,8 @@ def test_subsample():
     assert adata.n_obs == 40
     sc.pp.subsample(adata, fraction=0.1)
     assert adata.n_obs == 4
+    sc.pp.subsample(adata, n_obs=201, replace=True)
+    assert adata.n_obs == 201
 
 
 def test_scale():

From 671ec71fb76c2036aad3a771d1137d7b4677c455 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=B6k=C3=A7en=20Eraslan?= <gokcen.eraslan@gmail.com>
Date: Mon, 20 Apr 2020 16:34:36 -0400
Subject: [PATCH 02/28] Add sc.pp.sample with axis argument.

---
 scanpy/preprocessing/__init__.py   |   2 +-
 scanpy/preprocessing/_simple.py    | 105 +++++++++++++++++++++++------
 scanpy/tests/test_preprocessing.py |  31 +++++++--
 3 files changed, 109 insertions(+), 29 deletions(-)

diff --git a/scanpy/preprocessing/__init__.py b/scanpy/preprocessing/__init__.py
index 7c2c4d7aca..81241da595 100644
--- a/scanpy/preprocessing/__init__.py
+++ b/scanpy/preprocessing/__init__.py
@@ -2,7 +2,7 @@
 from ._simple import filter_cells, filter_genes
 from ._deprecated.highly_variable_genes import filter_genes_dispersion
 from ._highly_variable_genes import highly_variable_genes
-from ._simple import log1p, sqrt, scale, subsample
+from ._simple import log1p, sqrt, scale, subsample, sample
 from ._simple import normalize_per_cell, regress_out, downsample_counts
 from ._pca import pca
 from ._qc import calculate_qc_metrics
diff --git a/scanpy/preprocessing/_simple.py b/scanpy/preprocessing/_simple.py
index 2af4cff1c4..ad484f6f4f 100644
--- a/scanpy/preprocessing/_simple.py
+++ b/scanpy/preprocessing/_simple.py
@@ -743,16 +743,17 @@ def scale(
     return X if copy else None
 
 
-def subsample(
+def sample(
     data: Union[AnnData, np.ndarray, spmatrix],
     fraction: Optional[float] = None,
-    n_obs: Optional[int] = None,
+    n: Optional[int] = None,
     random_state: AnyRandom = 0,
     copy: bool = False,
     replace: bool = False,
+    axis: int = 0,
 ) -> Optional[AnnData]:
     """\
-    Subsample to a fraction of the number of observations.
+    Sample observations or variables with or without replacement.
 
     Parameters
     ----------
@@ -760,9 +761,10 @@ def subsample(
         The (annotated) data matrix of shape `n_obs` × `n_vars`.
         Rows correspond to cells and columns to genes.
     fraction
-        Subsample to this `fraction` of the number of observations.
-    n_obs
-        Subsample to this number of observations.
+        Subsample to this `fraction` of the number of observations or variables.
+        See `axis`.
+    n
+        Sample to this number of observations or variables. See `axis`.
     random_state
         Random seed to change subsampling.
     copy
@@ -770,35 +772,97 @@ def subsample(
         determines whether a copy is returned.
     replace
         If True, samples are drawn with replacement.
+    axis
+        Sample observations (axis=0) or variables (axis=1). Default is 0.
 
     Returns
     -------
-    Returns `X[obs_indices], obs_indices` if data is array-like, otherwise
-    subsamples the passed :class:`~anndata.AnnData` (`copy == False`) or
-    returns a subsampled copy of it (`copy == True`).
+    Returns `X[indices] or X[:, indices], indices` depending on the axis
+    argument if data is array-like, otherwise samples the passed
+    :class:`~anndata.AnnData` (`copy == False`) or returns a sampled
+    copy of it (`copy == True`).
     """
     np.random.seed(random_state)
-    old_n_obs = data.n_obs if isinstance(data, AnnData) else data.shape[0]
-    if n_obs is not None:
-        new_n_obs = n_obs
+    old_n = data.shape[axis]
+    if axis not in (0, 1):
+        raise ValueError(f'`axis` must be either 0 or 1.')
+    if fraction is None and n is None:
+        raise ValueError(f'Either `fraction` or `n` must be set.')
+    if fraction is not None and n is not None:
+        raise ValueError(f'Providing both `fraction` and `n` is not allowed.')
+    if n is not None:
+        new_n = n
     elif fraction is not None:
-        if fraction > 1 or fraction < 0:
+        if fraction < 0:
+            raise ValueError(f'`fraction needs to be nonnegative`, not {fraction}')
+        if not replace and fraction > 1:
             raise ValueError(
-                f'`fraction` needs to be within [0, 1], not {fraction}'
+                f'If replace=False, `fraction` needs to be within [0, 1], not {fraction}'
             )
-        new_n_obs = int(fraction * old_n_obs)
-        logg.debug(f'... subsampled to {new_n_obs} data points')
+        new_n = int(fraction * old_n)
+        obs_or_var_str = 'observations' if axis == 0 else 'variables'
+        logg.debug(f'... sampled to {new_n} {obs_or_var_str}')
     else:
         raise ValueError('Either pass `n_obs` or `fraction`.')
-    obs_indices = np.random.choice(old_n_obs, size=new_n_obs, replace=replace)
+    indices = np.random.choice(old_n, size=new_n, replace=replace)
     if isinstance(data, AnnData):
         if copy:
-            return data[obs_indices].copy()
+            view = data[indices] if axis == 0 else data[:, indices]
+            return view.copy()
         else:
-            data._inplace_subset_obs(obs_indices)
+            if axis == 0:
+                data._inplace_subset_obs(indices)
+            else:
+                data._inplace_subset_var(indices)
     else:
         X = data
-        return X[obs_indices], obs_indices
+        return X[indices] if axis == 0 else X[:, indices], indices
+
+
+def subsample(
+    data: Union[AnnData, np.ndarray, spmatrix],
+    fraction: Optional[float] = None,
+    n_obs: Optional[int] = None,
+    random_state: AnyRandom = 0,
+    copy: bool = False,
+) -> Optional[AnnData]:
+    """\
+    Subsample to a fraction of the number of observations.
+
+    .. warning::
+        .. deprecated:: 1.4.7
+            Use :func:`~scanpy.pp.sample` instead.
+
+    Parameters
+    ----------
+    data
+        The (annotated) data matrix of shape `n_obs` × `n_vars`.
+        Rows correspond to cells and columns to genes.
+    fraction
+        Subsample to this `fraction` of the number of observations.
+    n_obs
+        Subsample to this number of observations.
+    random_state
+        Random seed to change subsampling.
+    copy
+        If an :class:`~anndata.AnnData` is passed,
+        determines whether a copy is returned.
+
+    Returns
+    -------
+    Returns `X[obs_indices], obs_indices` if data is array-like, otherwise
+    subsamples the passed :class:`~anndata.AnnData` (`copy == False`) or
+    returns a subsampled copy of it (`copy == True`).
+    """
+    return sample(
+        data=data,
+        fraction=fraction,
+        n=n_obs,
+        random_state=random_state,
+        copy=copy,
+        replace=False,
+        axis=0,
+    )
 
 
 @deprecated_arg_names({"target_counts": "counts_per_cell"})
@@ -972,7 +1036,6 @@ def _downsample_array(
     return col
 
 
-
 def zscore_deprecated(X: np.ndarray) -> np.ndarray:
     """\
     Z-score standardize each variable/gene in X.
diff --git a/scanpy/tests/test_preprocessing.py b/scanpy/tests/test_preprocessing.py
index b6fce8b11d..e96ad35033 100644
--- a/scanpy/tests/test_preprocessing.py
+++ b/scanpy/tests/test_preprocessing.py
@@ -91,20 +91,37 @@ def test_normalize_per_cell():
         axis=1).A1.tolist()
 
 
-def test_subsample():
+def test_sample():
     adata = AnnData(np.ones((200, 10)))
-    sc.pp.subsample(adata, n_obs=40)
+    sc.pp.sample(adata, n=40)
     assert adata.n_obs == 40
-    sc.pp.subsample(adata, fraction=0.1)
+    sc.pp.sample(adata, fraction=0.1)
     assert adata.n_obs == 4
-    sc.pp.subsample(adata, n_obs=201, replace=True)
+    sc.pp.sample(adata, n=201, replace=True)
     assert adata.n_obs == 201
+    sc.pp.sample(adata, n=10, axis=1)
+    assert adata.n_vars == 10
+    sc.pp.sample(adata, n=11, axis=1, replace=True)
+    assert adata.n_vars == 11
+    sc.pp.sample(adata, fraction=2.0, axis=1, replace=True)
+    assert adata.n_vars == 22
 
+    adata = AnnData(sp.csr_matrix(np.ones((200, 10))))
+    sc.pp.sample(adata, fraction=2.0, axis=1, replace=True)
+    assert adata.n_vars == 20
 
-def test_subsample_copy():
+
+def test_sample_copy():
     adata = AnnData(np.ones((200, 10)))
-    assert sc.pp.subsample(adata, n_obs=40, copy=True).shape == (40, 10)
-    assert sc.pp.subsample(adata, fraction=0.1, copy=True).shape == (20, 10)
+    assert sc.pp.sample(adata, n=40, copy=True).shape == (40, 10)
+    assert sc.pp.sample(adata, fraction=0.1, copy=True).shape == (20, 10)
+    assert sc.pp.sample(adata, fraction=0.1, copy=True).shape == (20, 10)
+    X = sc.pp.sample(adata, fraction=2.0, axis=1, replace=True, copy=True)
+    assert X.shape == (200, 20)
+
+    adata = AnnData(sp.csr_matrix(np.ones((200, 10))))
+    X = sc.pp.sample(adata, fraction=2.0, axis=1, replace=True, copy=True)
+    assert X.shape == (200, 20)
 
 
 def test_scale():

From 9e0739bb103c7d6138c4ccf36ea5a8e92965de83 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=B6k=C3=A7en=20Eraslan?= <gokcen.eraslan@gmail.com>
Date: Mon, 20 Apr 2020 16:43:27 -0400
Subject: [PATCH 03/28] Fix fraction doc

---
 scanpy/preprocessing/_simple.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/scanpy/preprocessing/_simple.py b/scanpy/preprocessing/_simple.py
index ad484f6f4f..d02518f68d 100644
--- a/scanpy/preprocessing/_simple.py
+++ b/scanpy/preprocessing/_simple.py
@@ -761,8 +761,9 @@ def sample(
         The (annotated) data matrix of shape `n_obs` × `n_vars`.
         Rows correspond to cells and columns to genes.
     fraction
-        Subsample to this `fraction` of the number of observations or variables.
-        See `axis`.
+        Sample to this `fraction` of the number of observations or variables.
+        This can be larger than 1.0, if replace=True.
+        See `axis` and `replace`.
     n
         Sample to this number of observations or variables. See `axis`.
     random_state

From 8ec8cf38aa0a570f216e9f6a877dcc3da8ee02a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=B6k=C3=A7en=20Eraslan?= <gokcen.eraslan@gmail.com>
Date: Mon, 20 Apr 2020 16:46:28 -0400
Subject: [PATCH 04/28] Add to release notes

---
 docs/release-latest.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/release-latest.rst b/docs/release-latest.rst
index e568bd594e..ee52c332b3 100644
--- a/docs/release-latest.rst
+++ b/docs/release-latest.rst
@@ -4,6 +4,10 @@
 On master
 ~~~~~~~~~~
 
+.. rubric:: New functionality
+
+- :func:`~scanpy.pp.sample` supports both upsampling and downsampling of observations and variables. :func:`~scanpy.pp.subsample` is now deprecated.
+
 .. rubric:: Performance
 
 - :func:`~scanpy.pp.pca` now uses efficient implicit centering for sparse matrices. This can lead to signifigantly improved performance for large datasets :pr:`1066` :smaller:`A Tarashansky`

From fdf524afd3fb4b2476f3d8295502280dfb21702c Mon Sep 17 00:00:00 2001
From: "Philipp A." <flying-sheep@web.de>
Date: Thu, 14 Nov 2024 13:41:09 +0100
Subject: [PATCH 05/28] refactor

---
 src/scanpy/preprocessing/_simple.py | 130 ++++++++++++++++++----------
 1 file changed, 86 insertions(+), 44 deletions(-)

diff --git a/src/scanpy/preprocessing/_simple.py b/src/scanpy/preprocessing/_simple.py
index b5d76b323e..38e908d31a 100644
--- a/src/scanpy/preprocessing/_simple.py
+++ b/src/scanpy/preprocessing/_simple.py
@@ -8,7 +8,7 @@
 import warnings
 from functools import singledispatch
 from itertools import repeat
-from typing import TYPE_CHECKING, TypeVar
+from typing import TYPE_CHECKING, TypeVar, overload
 
 import numba
 import numpy as np
@@ -22,6 +22,7 @@
 from .._settings import settings as sett
 from .._utils import (
     _check_array_function_arguments,
+    _resolve_axis,
     axis_sum,
     is_backed_type,
     raise_not_implemented_error_if_backed_type,
@@ -33,7 +34,6 @@
 from ._distributed import materialize_as_ndarray
 from ._utils import _to_dense
 
-# install dask if available
 try:
     import dask.array as da
 except ImportError:
@@ -49,10 +49,13 @@
 
     import pandas as pd
     from numpy.typing import NDArray
+    from scipy.sparse import csc_matrix
 
     from .._compat import DaskArray
     from .._utils import AnyRandom
 
+    CSMatrix = csr_matrix | csc_matrix
+
 
 @old_positionals(
     "min_counts", "min_genes", "max_counts", "max_genes", "inplace", "copy"
@@ -825,16 +828,49 @@ def _regress_out_chunk(
     return np.vstack(responses_chunk_list)
 
 
+@overload
 def sample(
-    data: AnnData | np.ndarray | spmatrix,
+    data: AnnData,
+    fraction: float | None = None,
+    *,
+    n: int | None = None,
+    random_state: AnyRandom = 0,
+    copy: Literal[False] = False,
+    replace: bool = False,
+    axis: Literal["obs", 0, "var", 1] = "obs",
+) -> None: ...
+@overload
+def sample(
+    data: AnnData,
+    fraction: float | None = None,
+    *,
+    n: int | None = None,
+    random_state: AnyRandom = 0,
+    copy: Literal[True],
+    replace: bool = False,
+    axis: Literal["obs", 0, "var", 1] = "obs",
+) -> AnnData | None: ...
+@overload
+def sample(
+    data: np.ndarray | CSMatrix,
     fraction: float | None = None,
     *,
     n: int | None = None,
     random_state: AnyRandom = 0,
     copy: bool = False,
     replace: bool = False,
-    axis: int = 0,
-) -> AnnData | None:
+    axis: Literal["obs", 0, "var", 1] = "obs",
+) -> tuple[np.ndarray | CSMatrix, NDArray[np.int64]]: ...
+def sample(
+    data: AnnData | np.ndarray | CSMatrix,
+    fraction: float | None = None,
+    *,
+    n: int | None = None,
+    random_state: AnyRandom = 0,
+    copy: bool = False,
+    replace: bool = False,
+    axis: Literal["obs", 0, "var", 1] = "obs",
+) -> AnnData | None | tuple[np.ndarray | CSMatrix, NDArray[np.int64]]:
     """\
     Sample observations or variables with or without replacement.
 
@@ -845,7 +881,7 @@ def sample(
         Rows correspond to cells and columns to genes.
     fraction
         Sample to this `fraction` of the number of observations or variables.
-        This can be larger than 1.0, if replace=True.
+        This can be larger than 1.0, if `replace=True`.
         See `axis` and `replace`.
     n
         Sample to this number of observations or variables. See `axis`.
@@ -857,58 +893,64 @@ def sample(
     replace
         If True, samples are drawn with replacement.
     axis
-        Sample observations (axis=0) or variables (axis=1). Default is 0.
+        Sample `obs`\\ ervations (axis 0) or `var`\\ iables (axis 1).
 
     Returns
     -------
-    Returns `X[indices] or X[:, indices], indices` depending on the axis
-    argument if data is array-like, otherwise samples the passed
-    :class:`~anndata.AnnData` (`copy == False`) or returns a sampled
-    copy of it (`copy == True`).
+    If `isinstance(data, AnnData)` and `copy=False`,
+    this function returns `None`. Otherwise:
+
+    `data[indices, :]` | `data[:, indices]` (depending on `axis`)
+        If `data` is array-like or `copy=True`, returns the subset.
+    `indices` : numpy.ndarray
+        If `data` is array-like, also returns the indices into the original.
     """
-    np.random.seed(random_state)
+    axis, axis_name = _resolve_axis(axis)
+    match (fraction, n):
+        case (None, None):
+            msg = "Either `fraction` or `n` must be set."
+            raise TypeError(msg)
+        case (float(), int()):
+            msg = "Providing both `fraction` and `n` is not allowed."
+            raise TypeError(msg)
+
     old_n = data.shape[axis]
-    if axis not in (0, 1):
-        raise ValueError("`axis` must be either 0 or 1.")
-    if fraction is None and n is None:
-        raise ValueError("Either `fraction` or `n` must be set.")
-    if fraction is not None and n is not None:
-        raise ValueError("Providing both `fraction` and `n` is not allowed.")
     if n is not None:
         new_n = n
     elif fraction is not None:
         if fraction < 0:
-            raise ValueError(f"`fraction needs to be nonnegative`, not {fraction}")
+            msg = f"fraction needs to be nonnegative, not {fraction}"
+            raise ValueError(msg)
         if not replace and fraction > 1:
-            raise ValueError(
-                f"If replace=False, `fraction` needs to be within [0, 1], not {fraction}"
-            )
+            msg = f"If replace=False, `fraction` needs to be within [0, 1], not {fraction}"
+            raise ValueError(msg)
         new_n = int(fraction * old_n)
-        obs_or_var_str = "observations" if axis == 0 else "variables"
-        logg.debug(f"... sampled to {new_n} {obs_or_var_str}")
+        logg.debug(f"... sampled to {new_n} {axis_name}")
     else:
-        raise ValueError("Either pass `n_obs` or `fraction`.")
+        msg = "Either pass `n_obs` or `fraction`."
+        raise ValueError(msg)
+
+    np.random.seed(random_state)
     indices = np.random.choice(old_n, size=new_n, replace=replace)
-    if isinstance(data, AnnData):
-        if data.isbacked:
-            if copy:
-                view = data[indices] if axis == 0 else data[:, indices]
-                return view.to_memory()
-            else:
-                raise NotImplementedError(
-                    "Inplace sampling is not implemented for backed objects."
-                )
-        else:
-            if copy:
-                view = data[indices] if axis == 0 else data[:, indices]
-                return view.copy()
-            elif axis == 0:
-                data._inplace_subset_obs(indices)
-            else:
-                data._inplace_subset_var(indices)
+    subset = data[indices] if axis_name == "obs" else data[:, indices]
+
+    if not isinstance(data, AnnData):
+        assert not isinstance(subset, AnnData)
+        if copy:
+            subset = subset.copy()
+        return subset, indices
+    assert isinstance(subset, AnnData)
+    if copy:
+        return subset.to_memory() if data.isbacked else subset.copy()
+
+    # in-place
+    if data.isbacked:
+        msg = "Inplace sampling (`copy=False`) is not implemented for backed objects."
+        raise NotImplementedError(msg)
+    if axis_name == "obs":
+        data._inplace_subset_obs(indices)
     else:
-        X = data
-        return X[indices] if axis == 0 else X[:, indices], indices
+        data._inplace_subset_var(indices)
 
 
 @old_positionals("n_obs", "random_state", "copy")

From 061a19d4682118691684321272cbf9d1114c8038 Mon Sep 17 00:00:00 2001
From: "Philipp A." <flying-sheep@web.de>
Date: Thu, 14 Nov 2024 14:16:41 +0100
Subject: [PATCH 06/28] Refactor tests

---
 src/scanpy/preprocessing/_simple.py |  8 +--
 tests/test_preprocessing.py         | 96 +++++++++++++++++------------
 2 files changed, 62 insertions(+), 42 deletions(-)

diff --git a/src/scanpy/preprocessing/_simple.py b/src/scanpy/preprocessing/_simple.py
index 38e908d31a..bc4f50b201 100644
--- a/src/scanpy/preprocessing/_simple.py
+++ b/src/scanpy/preprocessing/_simple.py
@@ -849,7 +849,7 @@ def sample(
     copy: Literal[True],
     replace: bool = False,
     axis: Literal["obs", 0, "var", 1] = "obs",
-) -> AnnData | None: ...
+) -> AnnData: ...
 @overload
 def sample(
     data: np.ndarray | CSMatrix,
@@ -927,7 +927,7 @@ def sample(
         new_n = int(fraction * old_n)
         logg.debug(f"... sampled to {new_n} {axis_name}")
     else:
-        msg = "Either pass `n_obs` or `fraction`."
+        msg = "Either pass `n` or `fraction`."
         raise ValueError(msg)
 
     np.random.seed(random_state)
@@ -955,13 +955,13 @@ def sample(
 
 @old_positionals("n_obs", "random_state", "copy")
 def subsample(
-    data: AnnData | np.ndarray | spmatrix,
+    data: AnnData | np.ndarray | CSMatrix,
     fraction: float | None = None,
     *,
     n_obs: int | None = None,
     random_state: AnyRandom = 0,
     copy: bool = False,
-) -> AnnData | tuple[np.ndarray | spmatrix, NDArray[np.int64]] | None:
+) -> AnnData | tuple[np.ndarray | CSMatrix, NDArray[np.int64]] | None:
     """\
     Subsample to a fraction of the number of observations.
 
diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py
index 0939e3e74c..1354777ed5 100644
--- a/tests/test_preprocessing.py
+++ b/tests/test_preprocessing.py
@@ -3,6 +3,7 @@
 import warnings
 from itertools import product
 from pathlib import Path
+from typing import TYPE_CHECKING
 
 import numpy as np
 import pandas as pd
@@ -23,6 +24,13 @@
 from testing.scanpy._helpers.data import pbmc3k, pbmc68k_reduced
 from testing.scanpy._pytest.params import ARRAY_TYPES
 
+if TYPE_CHECKING:
+    from collections.abc import Callable
+    from typing import Literal
+
+    CSMatrix = sp.csc_matrix | sp.csr_matrix
+
+
 HERE = Path(__file__).parent
 DATA_PATH = HERE / "_data"
 
@@ -135,52 +143,64 @@ def test_normalize_per_cell():
     assert adata.X.sum(axis=1).tolist() == adata_sparse.X.sum(axis=1).A1.tolist()
 
 
-def test_sample():
-    warnings.filterwarnings("ignore", r".*names are not unique", UserWarning)
-    adata = AnnData(np.ones((200, 10)))
-    sc.pp.sample(adata, n=40)
-    assert adata.n_obs == 40
-    sc.pp.sample(adata, fraction=0.1)
-    assert adata.n_obs == 4
-    sc.pp.sample(adata, n=201, replace=True)
-    assert adata.n_obs == 201
-    sc.pp.sample(adata, n=10, axis=1)
-    assert adata.n_vars == 10
-    sc.pp.sample(adata, n=11, axis=1, replace=True)
-    assert adata.n_vars == 11
-    sc.pp.sample(adata, fraction=2.0, axis=1, replace=True)
-    assert adata.n_vars == 22
-
-    adata = AnnData(sp.csr_matrix(np.ones((200, 10))))
-    sc.pp.sample(adata, fraction=2.0, axis=1, replace=True)
-    assert adata.n_vars == 20
-
-
-def test_sample_copy():
-    adata = AnnData(np.ones((200, 10)))
-    assert sc.pp.sample(adata, n=40, copy=True).shape == (40, 10)
-    assert sc.pp.sample(adata, fraction=0.1, copy=True).shape == (20, 10)
-    assert sc.pp.sample(adata, fraction=0.1, copy=True).shape == (20, 10)
-    X = sc.pp.sample(adata, fraction=2.0, axis=1, replace=True, copy=True)
-    assert X.shape == (200, 20)
-
-    adata = AnnData(sp.csr_matrix(np.ones((200, 10))))
-    X = sc.pp.sample(adata, fraction=2.0, axis=1, replace=True, copy=True)
-    assert X.shape == (200, 20)
+@pytest.mark.parametrize("array_type", ARRAY_TYPES)
+@pytest.mark.parametrize("copy", [True, False], ids=["copy", "inplace"])
+@pytest.mark.parametrize(
+    ("axis", "fraction", "n", "replace", "expected"),
+    [
+        pytest.param(0, None, 40, False, 40, id="obs-40-no_replace"),
+        pytest.param(0, 0.1, None, False, 20, id="obs-0.1-no_replace"),
+        pytest.param(0, None, 201, True, 201, id="obs-201-replace"),
+        pytest.param(1, None, 10, False, 10, id="var-10-no_replace"),
+        pytest.param(1, None, 11, True, 11, id="var-11-replace"),
+        pytest.param(1, 2.0, None, True, 20, id="var-2.0-replace"),
+    ],
+)
+@pytest.mark.filterwarnings("ignore::dask.array.PerformanceWarning")
+def test_sample(
+    *,
+    array_type: Callable[[np.ndarray], np.ndarray | CSMatrix],
+    copy: bool,
+    axis: Literal[0, 1],
+    fraction: float | None,
+    n: int | None,
+    replace: bool,
+    expected: int,
+):
+    adata = AnnData(array_type(np.ones((200, 10))))
+
+    # can’t guarantee that duplicates are drawn when `replace=True`,
+    # so we just ignore the warning instead using `with pytest.warns(...)`
+    warnings.filterwarnings(
+        "ignore" if replace else "error", r".*names are not unique", UserWarning
+    )
+    rv = sc.pp.sample(adata, fraction, n=n, replace=replace, axis=axis, copy=copy)
+
+    if copy:
+        assert adata.shape == (200, 10)
+        subset = rv
+    else:
+        assert rv is None
+        subset = adata
+
+    assert subset.shape == ((expected, 10) if axis == 0 else (200, expected))
 
 
 def test_sample_copy_backed(tmp_path):
-    A = np.random.rand(200, 10).astype(np.float32)
-    adata_m = AnnData(A.copy())
-    adata_d = AnnData(A.copy())
-    filename = tmp_path / "test.h5ad"
-    adata_d.filename = filename
-    # This should not throw an error
+    adata_m = AnnData(np.random.rand(200, 10).astype(np.float32))
+    adata_d = adata_m.copy()
+    adata_d.filename = tmp_path / "test.h5ad"
+
     assert sc.pp.sample(adata_d, n=40, copy=True).shape == (40, 10)
     np.testing.assert_array_equal(
         sc.pp.sample(adata_m, n=40, copy=True).X,
         sc.pp.sample(adata_d, n=40, copy=True).X,
     )
+
+
+def test_sample_copy_backed_error(tmp_path):
+    adata_d = AnnData(np.random.rand(200, 10).astype(np.float32))
+    adata_d.filename = tmp_path / "test.h5ad"
     with pytest.raises(NotImplementedError):
         sc.pp.sample(adata_d, n=40, copy=False)
 

From 06d428006798727a3b3ea3fa97f2ddce916b7812 Mon Sep 17 00:00:00 2001
From: "Philipp A." <flying-sheep@web.de>
Date: Thu, 14 Nov 2024 14:36:05 +0100
Subject: [PATCH 07/28] handle array case in test

---
 tests/test_preprocessing.py | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py
index 1354777ed5..36c1726333 100644
--- a/tests/test_preprocessing.py
+++ b/tests/test_preprocessing.py
@@ -144,7 +144,7 @@ def test_normalize_per_cell():
 
 
 @pytest.mark.parametrize("array_type", ARRAY_TYPES)
-@pytest.mark.parametrize("copy", [True, False], ids=["copy", "inplace"])
+@pytest.mark.parametrize("which", ["copy", "inplace", "array"])
 @pytest.mark.parametrize(
     ("axis", "fraction", "n", "replace", "expected"),
     [
@@ -160,7 +160,7 @@ def test_normalize_per_cell():
 def test_sample(
     *,
     array_type: Callable[[np.ndarray], np.ndarray | CSMatrix],
-    copy: bool,
+    which: Literal["copy", "inplace", "array"],
     axis: Literal[0, 1],
     fraction: float | None,
     n: int | None,
@@ -174,14 +174,30 @@ def test_sample(
     warnings.filterwarnings(
         "ignore" if replace else "error", r".*names are not unique", UserWarning
     )
-    rv = sc.pp.sample(adata, fraction, n=n, replace=replace, axis=axis, copy=copy)
+    rv = sc.pp.sample(
+        adata.X if which == "array" else adata,
+        fraction,
+        n=n,
+        replace=replace,
+        axis=axis,
+        # `copy` only effects AnnData inputs
+        copy=dict(copy=True, inplace=False, array=False)[which],
+    )
 
-    if copy:
-        assert adata.shape == (200, 10)
-        subset = rv
-    else:
-        assert rv is None
-        subset = adata
+    match which:
+        case "copy":
+            subset = rv
+            assert rv is not adata
+            assert adata.shape == (200, 10)
+        case "inplace":
+            subset = adata
+            assert rv is None
+        case "array":
+            subset, indices = rv
+            assert len(indices) == expected
+            assert adata.shape == (200, 10)
+        case _:
+            pytest.fail(f"Unknown `{which=}`")
 
     assert subset.shape == ((expected, 10) if axis == 0 else (200, expected))
 

From 6eeab2ea641a95a4506b39da3768c22f899d3ee5 Mon Sep 17 00:00:00 2001
From: "Philipp A." <flying-sheep@web.de>
Date: Thu, 14 Nov 2024 14:58:49 +0100
Subject: [PATCH 08/28] Test errors

---
 src/scanpy/preprocessing/_simple.py | 33 +++++++++++++---------------
 tests/test_preprocessing.py         | 34 ++++++++++++++++++++++++++++-
 2 files changed, 48 insertions(+), 19 deletions(-)

diff --git a/src/scanpy/preprocessing/_simple.py b/src/scanpy/preprocessing/_simple.py
index bc4f50b201..26f025ad95 100644
--- a/src/scanpy/preprocessing/_simple.py
+++ b/src/scanpy/preprocessing/_simple.py
@@ -906,32 +906,29 @@ def sample(
         If `data` is array-like, also returns the indices into the original.
     """
     axis, axis_name = _resolve_axis(axis)
+    old_n = data.shape[axis]
     match (fraction, n):
         case (None, None):
             msg = "Either `fraction` or `n` must be set."
             raise TypeError(msg)
-        case (float(), int()):
+        case (None, _):
+            pass
+        case (_, None):
+            if fraction < 0:
+                msg = f"`{fraction=}` needs to be nonnegative."
+                raise ValueError(msg)
+            if not replace and fraction > 1:
+                msg = f"If `replace=False`, `{fraction=}` needs to be within [0, 1]."
+                raise ValueError(msg)
+            n = int(fraction * old_n)
+            logg.debug(f"... sampled to {n} {axis_name}")
+        case _:
             msg = "Providing both `fraction` and `n` is not allowed."
             raise TypeError(msg)
-
-    old_n = data.shape[axis]
-    if n is not None:
-        new_n = n
-    elif fraction is not None:
-        if fraction < 0:
-            msg = f"fraction needs to be nonnegative, not {fraction}"
-            raise ValueError(msg)
-        if not replace and fraction > 1:
-            msg = f"If replace=False, `fraction` needs to be within [0, 1], not {fraction}"
-            raise ValueError(msg)
-        new_n = int(fraction * old_n)
-        logg.debug(f"... sampled to {new_n} {axis_name}")
-    else:
-        msg = "Either pass `n` or `fraction`."
-        raise ValueError(msg)
+    del fraction
 
     np.random.seed(random_state)
-    indices = np.random.choice(old_n, size=new_n, replace=replace)
+    indices = np.random.choice(old_n, size=n, replace=replace)
     subset = data[indices] if axis_name == "obs" else data[:, indices]
 
     if not isinstance(data, AnnData):
diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py
index 36c1726333..3a5be7d257 100644
--- a/tests/test_preprocessing.py
+++ b/tests/test_preprocessing.py
@@ -26,7 +26,7 @@
 
 if TYPE_CHECKING:
     from collections.abc import Callable
-    from typing import Literal
+    from typing import Any, Literal
 
     CSMatrix = sp.csc_matrix | sp.csr_matrix
 
@@ -202,6 +202,38 @@ def test_sample(
     assert subset.shape == ((expected, 10) if axis == 0 else (200, expected))
 
 
+@pytest.mark.parametrize(
+    ("args", "exc", "pattern"),
+    [
+        pytest.param(
+            dict(), TypeError, r"Either `fraction` or `n` must be set", id="empty"
+        ),
+        pytest.param(
+            dict(n=10, fraction=0.2),
+            TypeError,
+            r"Providing both `fraction` and `n` is not allowed",
+            id="both",
+        ),
+        pytest.param(
+            dict(fraction=2),
+            ValueError,
+            r"If `replace=False`, `fraction=2` needs to be",
+            id="frac>1",
+        ),
+        pytest.param(
+            dict(fraction=-0.3),
+            ValueError,
+            r"`fraction=-0\.3` needs to be nonnegative",
+            id="frac<0",
+        ),
+    ],
+)
+def test_sample_error(args: dict[str, Any], exc: type[Exception], pattern: str):
+    adata = AnnData(np.ones((200, 10)))
+    with pytest.raises(exc, match=pattern):
+        sc.pp.sample(adata, **args)
+
+
 def test_sample_copy_backed(tmp_path):
     adata_m = AnnData(np.random.rand(200, 10).astype(np.float32))
     adata_d = adata_m.copy()

From b1f50610f41c937f0097f9719d603f67f449c904 Mon Sep 17 00:00:00 2001
From: "Philipp A." <flying-sheep@web.de>
Date: Thu, 14 Nov 2024 15:02:57 +0100
Subject: [PATCH 09/28] prettier deprecations

---
 .../_deprecated/highly_variable_genes.py      | 22 ++++++++---------
 src/scanpy/preprocessing/_simple.py           | 24 +++++++++----------
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py b/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py
index f2c3ce971b..e322013d47 100644
--- a/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py
+++ b/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py
@@ -48,18 +48,18 @@ def filter_genes_dispersion(
     """\
     Extract highly variable genes :cite:p:`Satija2015,Zheng2017`.
 
-    .. warning::
-        .. deprecated:: 1.3.6
-            Use :func:`~scanpy.pp.highly_variable_genes`
-            instead. The new function is equivalent to the present
-            function, except that
+    .. deprecated:: 1.3.6
 
-            * the new function always expects logarithmized data
-            * `subset=False` in the new function, it suffices to
-              merely annotate the genes, tools like `pp.pca` will
-              detect the annotation
-            * you can now call: `sc.pl.highly_variable_genes(adata)`
-            * `copy` is replaced by `inplace`
+       Use :func:`~scanpy.pp.highly_variable_genes`
+       instead. The new function is equivalent to the present
+       function, except that
+
+       * the new function always expects logarithmized data
+       * `subset=False` in the new function, it suffices to
+         merely annotate the genes, tools like `pp.pca` will
+         detect the annotation
+       * you can now call: `sc.pl.highly_variable_genes(adata)`
+       * `copy` is replaced by `inplace`
 
     If trying out parameters, pass the data matrix instead of AnnData.
 
diff --git a/src/scanpy/preprocessing/_simple.py b/src/scanpy/preprocessing/_simple.py
index 26f025ad95..e578cfdb90 100644
--- a/src/scanpy/preprocessing/_simple.py
+++ b/src/scanpy/preprocessing/_simple.py
@@ -501,16 +501,16 @@ def normalize_per_cell(
     """\
     Normalize total counts per cell.
 
-    .. warning::
-        .. deprecated:: 1.3.7
-            Use :func:`~scanpy.pp.normalize_total` instead.
-            The new function is equivalent to the present
-            function, except that
+    .. deprecated:: 1.3.7
 
-            * the new function doesn't filter cells based on `min_counts`,
-              use :func:`~scanpy.pp.filter_cells` if filtering is needed.
-            * some arguments were renamed
-            * `copy` is replaced by `inplace`
+       Use :func:`~scanpy.pp.normalize_total` instead.
+       The new function is equivalent to the present
+       function, except that
+
+       * the new function doesn't filter cells based on `min_counts`,
+         use :func:`~scanpy.pp.filter_cells` if filtering is needed.
+       * some arguments were renamed
+       * `copy` is replaced by `inplace`
 
     Normalize each cell by total counts over all genes, so that every cell has
     the same total count after normalization.
@@ -962,9 +962,9 @@ def subsample(
     """\
     Subsample to a fraction of the number of observations.
 
-    .. warning::
-        .. deprecated:: 1.4.7
-            Use :func:`~scanpy.pp.sample` instead.
+    .. deprecated:: 1.11.0
+
+       Use :func:`~scanpy.pp.sample` instead.
 
     Parameters
     ----------

From cec8affe0c1fe881bad146846c4ba856081f6030 Mon Sep 17 00:00:00 2001
From: "Philipp A." <flying-sheep@web.de>
Date: Thu, 14 Nov 2024 15:13:21 +0100
Subject: [PATCH 10/28] docs

---
 docs/api/deprecated.md    | 1 +
 docs/api/preprocessing.md | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/api/deprecated.md b/docs/api/deprecated.md
index 4511f4b3a7..d09c1af405 100644
--- a/docs/api/deprecated.md
+++ b/docs/api/deprecated.md
@@ -11,4 +11,5 @@
 
    pp.filter_genes_dispersion
    pp.normalize_per_cell
+   pp.subsample
 ```
diff --git a/docs/api/preprocessing.md b/docs/api/preprocessing.md
index 4b17567a6b..36e732a6dc 100644
--- a/docs/api/preprocessing.md
+++ b/docs/api/preprocessing.md
@@ -31,7 +31,7 @@ For visual quality control, see {func}`~scanpy.pl.highest_expr_genes` and
    pp.normalize_total
    pp.regress_out
    pp.scale
-   pp.subsample
+   pp.sample
    pp.downsample_counts
 ```
 

From daa147ef681483ba5bb0808f5da0724ff10a5ce9 Mon Sep 17 00:00:00 2001
From: "Philipp A." <flying-sheep@web.de>
Date: Thu, 14 Nov 2024 15:38:16 +0100
Subject: [PATCH 11/28] ignore dask warning correctly

---
 tests/test_preprocessing.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py
index 3a5be7d257..e168f66e14 100644
--- a/tests/test_preprocessing.py
+++ b/tests/test_preprocessing.py
@@ -22,7 +22,7 @@
     maybe_dask_process_context,
 )
 from testing.scanpy._helpers.data import pbmc3k, pbmc68k_reduced
-from testing.scanpy._pytest.params import ARRAY_TYPES
+from testing.scanpy._pytest.params import ARRAY_TYPES, param_with
 
 if TYPE_CHECKING:
     from collections.abc import Callable
@@ -143,7 +143,16 @@ def test_normalize_per_cell():
     assert adata.X.sum(axis=1).tolist() == adata_sparse.X.sum(axis=1).A1.tolist()
 
 
-@pytest.mark.parametrize("array_type", ARRAY_TYPES)
+ignore_dask_perf = pytest.mark.filterwarnings("ignore::dask.array.PerformanceWarning")
+
+
+@pytest.mark.parametrize(
+    "array_type",
+    [
+        param_with(p, marks=[ignore_dask_perf] if "dask" in (p.id or "") else [])
+        for p in ARRAY_TYPES
+    ],
+)
 @pytest.mark.parametrize("which", ["copy", "inplace", "array"])
 @pytest.mark.parametrize(
     ("axis", "fraction", "n", "replace", "expected"),
@@ -156,7 +165,6 @@ def test_normalize_per_cell():
         pytest.param(1, 2.0, None, True, 20, id="var-2.0-replace"),
     ],
 )
-@pytest.mark.filterwarnings("ignore::dask.array.PerformanceWarning")
 def test_sample(
     *,
     array_type: Callable[[np.ndarray], np.ndarray | CSMatrix],

From 3c31abd898f816b61c9e7b867d8130929e5e50d3 Mon Sep 17 00:00:00 2001
From: "Philipp A." <flying-sheep@web.de>
Date: Thu, 14 Nov 2024 15:43:06 +0100
Subject: [PATCH 12/28] sig exception

---
 tests/test_package_structure.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_package_structure.py b/tests/test_package_structure.py
index 834c06d8b4..3541c561a5 100644
--- a/tests/test_package_structure.py
+++ b/tests/test_package_structure.py
@@ -138,6 +138,7 @@ class ExpectedSig(TypedDict):
 copy_sigs["sc.pp.filter_cells"] = None  # unclear `inplace` situation
 copy_sigs["sc.pp.filter_genes"] = None  # unclear `inplace` situation
 copy_sigs["sc.pp.subsample"] = None  # returns indices along matrix
+copy_sigs["sc.pp.sample"] = None  # returns indices along matrix
 # partial exceptions: “data” instead of “adata”
 copy_sigs["sc.pp.log1p"]["first_name"] = "data"
 copy_sigs["sc.pp.normalize_per_cell"]["first_name"] = "data"

From d3504114d2d36580b08ee7b0dc794ab7d0b24f1b Mon Sep 17 00:00:00 2001
From: "Philipp A." <flying-sheep@web.de>
Date: Mon, 18 Nov 2024 16:57:27 +0100
Subject: [PATCH 13/28] WIP

---
 src/scanpy/_compat.py               | 22 ++++++++++++++++++++++
 src/scanpy/preprocessing/_simple.py | 27 ++++++++++++++-------------
 tests/test_utils.py                 | 21 ++++++++++++++++++++-
 3 files changed, 56 insertions(+), 14 deletions(-)

diff --git a/src/scanpy/_compat.py b/src/scanpy/_compat.py
index c5fa4dbe84..d3de7b73ef 100644
--- a/src/scanpy/_compat.py
+++ b/src/scanpy/_compat.py
@@ -9,12 +9,14 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, Literal, ParamSpec, TypeVar, cast, overload
 
+import numpy as np
 from packaging.version import Version
 
 if TYPE_CHECKING:
     from collections.abc import Callable
     from importlib.metadata import PackageMetadata
 
+
 P = ParamSpec("P")
 R = TypeVar("R")
 
@@ -194,3 +196,23 @@ def _numba_threading_layer() -> Layer:
         f" ({available=}, {numba.config.THREADING_LAYER_PRIORITY=})"
     )
     raise ValueError(msg)
+
+
+_LegacyRandom = int | np.random.RandomState | None
+
+
+def _legacy_numpy_gen(
+    random_state: _LegacyRandom | None = None,
+) -> np.random.RandomState:
+    """Return a random generator that behaves like the legacy one."""
+
+    if random_state is not None:
+        if isinstance(random_state, np.random.RandomState):
+            np.random.set_state(random_state.get_state(legacy=False))
+            return random_state
+        np.random.seed(random_state)
+    state = np.random.get_state(legacy=True)
+    assert isinstance(state, tuple)
+    bit_gen = np.random.MT19937()
+    bit_gen.state = state
+    return np.random.RandomState(bit_gen)
diff --git a/src/scanpy/preprocessing/_simple.py b/src/scanpy/preprocessing/_simple.py
index e578cfdb90..9ec518f412 100644
--- a/src/scanpy/preprocessing/_simple.py
+++ b/src/scanpy/preprocessing/_simple.py
@@ -18,7 +18,7 @@
 from sklearn.utils import check_array, sparsefuncs
 
 from .. import logging as logg
-from .._compat import njit, old_positionals
+from .._compat import _legacy_numpy_gen, njit, old_positionals
 from .._settings import settings as sett
 from .._utils import (
     _check_array_function_arguments,
@@ -51,8 +51,8 @@
     from numpy.typing import NDArray
     from scipy.sparse import csc_matrix
 
-    from .._compat import DaskArray
-    from .._utils import AnyRandom
+    from .._compat import DaskArray, _LegacyRandom
+    from .._utils import RNGLike, SeedLike
 
     CSMatrix = csr_matrix | csc_matrix
 
@@ -834,7 +834,7 @@ def sample(
     fraction: float | None = None,
     *,
     n: int | None = None,
-    random_state: AnyRandom = 0,
+    rng: RNGLike | SeedLike | None = 0,
     copy: Literal[False] = False,
     replace: bool = False,
     axis: Literal["obs", 0, "var", 1] = "obs",
@@ -845,7 +845,7 @@ def sample(
     fraction: float | None = None,
     *,
     n: int | None = None,
-    random_state: AnyRandom = 0,
+    rng: RNGLike | SeedLike | None = None,
     copy: Literal[True],
     replace: bool = False,
     axis: Literal["obs", 0, "var", 1] = "obs",
@@ -856,7 +856,7 @@ def sample(
     fraction: float | None = None,
     *,
     n: int | None = None,
-    random_state: AnyRandom = 0,
+    rng: RNGLike | SeedLike | None = None,
     copy: bool = False,
     replace: bool = False,
     axis: Literal["obs", 0, "var", 1] = "obs",
@@ -866,7 +866,7 @@ def sample(
     fraction: float | None = None,
     *,
     n: int | None = None,
-    random_state: AnyRandom = 0,
+    rng: RNGLike | SeedLike | None = None,
     copy: bool = False,
     replace: bool = False,
     axis: Literal["obs", 0, "var", 1] = "obs",
@@ -927,8 +927,9 @@ def sample(
             raise TypeError(msg)
     del fraction
 
-    np.random.seed(random_state)
-    indices = np.random.choice(old_n, size=n, replace=replace)
+    if not isinstance(rng, np.random.RandomState):
+        rng = np.random.default_rng(rng)
+    indices = rng.choice(old_n, size=n, replace=replace)
     subset = data[indices] if axis_name == "obs" else data[:, indices]
 
     if not isinstance(data, AnnData):
@@ -956,7 +957,7 @@ def subsample(
     fraction: float | None = None,
     *,
     n_obs: int | None = None,
-    random_state: AnyRandom = 0,
+    random_state: _LegacyRandom = 0,
     copy: bool = False,
 ) -> AnnData | tuple[np.ndarray | CSMatrix, NDArray[np.int64]] | None:
     """\
@@ -991,7 +992,7 @@ def subsample(
         data=data,
         fraction=fraction,
         n=n_obs,
-        random_state=random_state,
+        rng=_legacy_numpy_gen(random_state),
         copy=copy,
         replace=False,
         axis=0,
@@ -1004,7 +1005,7 @@ def downsample_counts(
     counts_per_cell: int | Collection[int] | None = None,
     total_counts: int | None = None,
     *,
-    random_state: AnyRandom = 0,
+    random_state: _LegacyRandom = 0,
     replace: bool = False,
     copy: bool = False,
 ) -> AnnData | None:
@@ -1140,7 +1141,7 @@ def _downsample_array(
     col: np.ndarray,
     target: int,
     *,
-    random_state: AnyRandom = 0,
+    random_state: _LegacyRandom = 0,
     replace: bool = True,
     inplace: bool = False,
 ):
diff --git a/tests/test_utils.py b/tests/test_utils.py
index f8a38a5f9d..aebd6b7ec5 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -9,7 +9,7 @@
 from packaging.version import Version
 from scipy.sparse import csr_matrix, issparse
 
-from scanpy._compat import DaskArray, pkg_version
+from scanpy._compat import DaskArray, _legacy_numpy_gen, pkg_version
 from scanpy._utils import (
     axis_mul_or_truediv,
     axis_sum,
@@ -247,3 +247,22 @@ def test_is_constant_dask(request: pytest.FixtureRequest, axis, expected, block_
     x = da.from_array(np.array(x_data), chunks=2).map_blocks(block_type)
     result = is_constant(x, axis=axis).compute()
     np.testing.assert_array_equal(expected, result)
+
+
+@pytest.mark.parametrize("seed", [0, 1, 1256712675])
+@pytest.mark.parametrize("func", ["choice"])
+def test_legacy_numpy_gen(seed: int, func: str):
+    arr_module = _mk_random(seed, func, legacy=True)
+    arr_generator = _mk_random(seed, func, legacy=False)
+    np.testing.assert_array_equal(arr_module, arr_generator)
+
+
+def _mk_random(seed: int, func: str, *, legacy: bool) -> np.ndarray:
+    np.random.seed(seed)
+    gen = np.random if legacy else _legacy_numpy_gen()
+    match func:
+        case "choice":
+            arr = np.arange(1000)
+            return gen.choice(arr, size=(100, 100))
+        case _:
+            pytest.fail(f"Unknown {func=}")

From c24e9b21a6add88c3b2234aeeaf7e9cbc3feb8fe Mon Sep 17 00:00:00 2001
From: Phil Schaf <flying-sheep@web.de>
Date: Tue, 19 Nov 2024 09:40:22 +0100
Subject: [PATCH 14/28] remove duplicate _LegacyRandom

---
 src/scanpy/_compat.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/scanpy/_compat.py b/src/scanpy/_compat.py
index 6e14f4c2c4..1301d2c1b9 100644
--- a/src/scanpy/_compat.py
+++ b/src/scanpy/_compat.py
@@ -200,9 +200,6 @@ def _numba_threading_layer() -> Layer:
     raise ValueError(msg)
 
 
-_LegacyRandom = int | np.random.RandomState | None
-
-
 def _legacy_numpy_gen(
     random_state: _LegacyRandom | None = None,
 ) -> np.random.RandomState:

From e246f02d51548f3de77cea02921480bf137b26f4 Mon Sep 17 00:00:00 2001
From: Phil Schaf <flying-sheep@web.de>
Date: Tue, 19 Nov 2024 09:43:29 +0100
Subject: [PATCH 15/28] undo compat thing

---
 src/scanpy/preprocessing/_simple.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/scanpy/preprocessing/_simple.py b/src/scanpy/preprocessing/_simple.py
index 9ec518f412..ef0c568d60 100644
--- a/src/scanpy/preprocessing/_simple.py
+++ b/src/scanpy/preprocessing/_simple.py
@@ -927,8 +927,7 @@ def sample(
             raise TypeError(msg)
     del fraction
 
-    if not isinstance(rng, np.random.RandomState):
-        rng = np.random.default_rng(rng)
+    rng = np.random.default_rng(rng)
     indices = rng.choice(old_n, size=n, replace=replace)
     subset = data[indices] if axis_name == "obs" else data[:, indices]
 

From 4ad40b71b61d16fac71936d59a7abbb9569e6a7f Mon Sep 17 00:00:00 2001
From: Phil Schaf <flying-sheep@web.de>
Date: Tue, 19 Nov 2024 10:06:27 +0100
Subject: [PATCH 16/28] fix backwards compat

---
 src/scanpy/preprocessing/_simple.py | 17 ++++++++---------
 tests/test_preprocessing.py         |  4 ++--
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/scanpy/preprocessing/_simple.py b/src/scanpy/preprocessing/_simple.py
index ef0c568d60..5ad611d419 100644
--- a/src/scanpy/preprocessing/_simple.py
+++ b/src/scanpy/preprocessing/_simple.py
@@ -8,7 +8,7 @@
 import warnings
 from functools import singledispatch
 from itertools import repeat
-from typing import TYPE_CHECKING, TypeVar, overload
+from typing import TYPE_CHECKING, TypeVar, cast, overload
 
 import numba
 import numpy as np
@@ -927,7 +927,10 @@ def sample(
             raise TypeError(msg)
     del fraction
 
-    rng = np.random.default_rng(rng)
+    # Our backwards compat code passes a `RandomState` here
+    rng: RNGLike | SeedLike | np.random.RandomState | None
+    if not isinstance(rng, np.random.RandomState):
+        rng = np.random.default_rng(rng)
     indices = rng.choice(old_n, size=n, replace=replace)
     subset = data[indices] if axis_name == "obs" else data[:, indices]
 
@@ -987,14 +990,10 @@ def subsample(
     subsamples the passed :class:`~anndata.AnnData` (`copy == False`) or
     returns a subsampled copy of it (`copy == True`).
     """
+    # `sample` can use `RandomState`, but we don’t want to advertise that
+    rng = cast(np.random.Generator, _legacy_numpy_gen(random_state))
     return sample(
-        data=data,
-        fraction=fraction,
-        n=n_obs,
-        rng=_legacy_numpy_gen(random_state),
-        copy=copy,
-        replace=False,
-        axis=0,
+        data=data, fraction=fraction, n=n_obs, rng=rng, copy=copy, replace=False, axis=0
     )
 
 
diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py
index e168f66e14..1df495cf3a 100644
--- a/tests/test_preprocessing.py
+++ b/tests/test_preprocessing.py
@@ -249,8 +249,8 @@ def test_sample_copy_backed(tmp_path):
 
     assert sc.pp.sample(adata_d, n=40, copy=True).shape == (40, 10)
     np.testing.assert_array_equal(
-        sc.pp.sample(adata_m, n=40, copy=True).X,
-        sc.pp.sample(adata_d, n=40, copy=True).X,
+        sc.pp.sample(adata_m, n=40, copy=True, rng=0).X,
+        sc.pp.sample(adata_d, n=40, copy=True, rng=0).X,
     )
 
 

From 1b8c81e7aaeb4d00abf3af58c217706f4cffa6fc Mon Sep 17 00:00:00 2001
From: Phil Schaf <flying-sheep@web.de>
Date: Tue, 19 Nov 2024 17:01:10 +0100
Subject: [PATCH 17/28] Use fake Generator

---
 docs/release-notes/943.feature.md   |  2 +-
 src/scanpy/_compat.py               | 31 ++++++++++++++++++++++++++---
 src/scanpy/preprocessing/_simple.py | 10 +++-------
 3 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/docs/release-notes/943.feature.md b/docs/release-notes/943.feature.md
index f47662ef4e..4f5474d762 100644
--- a/docs/release-notes/943.feature.md
+++ b/docs/release-notes/943.feature.md
@@ -1 +1 @@
-{func}`~scanpy.pp.sample` supports both upsampling and downsampling of observations and variables. {func}`~scanpy.pp.subsample` is now deprecated. {smaller}`G Eraslan`
+{func}`~scanpy.pp.sample` supports both upsampling and downsampling of observations and variables. {func}`~scanpy.pp.subsample` is now deprecated. {smaller}`G Eraslan` & {smaller}`P Angerer`
diff --git a/src/scanpy/_compat.py b/src/scanpy/_compat.py
index 1301d2c1b9..b8ad666906 100644
--- a/src/scanpy/_compat.py
+++ b/src/scanpy/_compat.py
@@ -202,16 +202,41 @@ def _numba_threading_layer() -> Layer:
 
 def _legacy_numpy_gen(
     random_state: _LegacyRandom | None = None,
-) -> np.random.RandomState:
+) -> np.random.Generator:
     """Return a random generator that behaves like the legacy one."""
 
     if random_state is not None:
         if isinstance(random_state, np.random.RandomState):
             np.random.set_state(random_state.get_state(legacy=False))
-            return random_state
+            return _FakeRandomGen(random_state)
         np.random.seed(random_state)
     state = np.random.get_state(legacy=True)
     assert isinstance(state, tuple)
     bit_gen = np.random.MT19937()
     bit_gen.state = state
-    return np.random.RandomState(bit_gen)
+    return _FakeRandomGen(np.random.RandomState(bit_gen))
+
+
+class _FakeRandomGen(np.random.Generator):
+    _state: np.random.RandomState
+
+    def __init__(self, random_state: np.random.RandomState) -> None:
+        self._state = random_state
+
+    @classmethod
+    def _delegate(cls) -> None:
+        for name, meth in np.random.Generator.__dict__.items():
+            if name.startswith("_") or not callable(meth):
+                continue
+
+            def mk_wrapper(name: str):
+                @wraps(meth)
+                def wrapper(self: _FakeRandomGen, *args, **kwargs):
+                    return getattr(self._state, name)(*args, **kwargs)
+
+                return wrapper
+
+            setattr(cls, name, mk_wrapper(name))
+
+
+_FakeRandomGen._delegate()
diff --git a/src/scanpy/preprocessing/_simple.py b/src/scanpy/preprocessing/_simple.py
index 5ad611d419..2b26b162a8 100644
--- a/src/scanpy/preprocessing/_simple.py
+++ b/src/scanpy/preprocessing/_simple.py
@@ -8,7 +8,7 @@
 import warnings
 from functools import singledispatch
 from itertools import repeat
-from typing import TYPE_CHECKING, TypeVar, cast, overload
+from typing import TYPE_CHECKING, TypeVar, overload
 
 import numba
 import numpy as np
@@ -927,10 +927,7 @@ def sample(
             raise TypeError(msg)
     del fraction
 
-    # Our backwards compat code passes a `RandomState` here
-    rng: RNGLike | SeedLike | np.random.RandomState | None
-    if not isinstance(rng, np.random.RandomState):
-        rng = np.random.default_rng(rng)
+    rng = np.random.default_rng(rng)
     indices = rng.choice(old_n, size=n, replace=replace)
     subset = data[indices] if axis_name == "obs" else data[:, indices]
 
@@ -990,8 +987,7 @@ def subsample(
     subsamples the passed :class:`~anndata.AnnData` (`copy == False`) or
     returns a subsampled copy of it (`copy == True`).
     """
-    # `sample` can use `RandomState`, but we don’t want to advertise that
-    rng = cast(np.random.Generator, _legacy_numpy_gen(random_state))
+    rng = _legacy_numpy_gen(random_state)
     return sample(
         data=data, fraction=fraction, n=n_obs, rng=rng, copy=copy, replace=False, axis=0
     )

From 594d961eb895288970df7f5a6478f047f807243a Mon Sep 17 00:00:00 2001
From: Phil Schaf <flying-sheep@web.de>
Date: Tue, 19 Nov 2024 17:08:59 +0100
Subject: [PATCH 18/28] backwards compat test

---
 tests/test_preprocessing.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py
index 1df495cf3a..4bf2d128aa 100644
--- a/tests/test_preprocessing.py
+++ b/tests/test_preprocessing.py
@@ -242,6 +242,15 @@ def test_sample_error(args: dict[str, Any], exc: type[Exception], pattern: str):
         sc.pp.sample(adata, **args)
 
 
+def test_sample_backwards_compat():
+    expected = np.array(
+        [26, 86, 2, 55, 75, 93, 16, 73, 54, 95, 53, 92, 78, 13, 7, 30, 22, 24, 33, 8]
+    )
+    legacy_result, indices = sc.pp.subsample(np.arange(100), n_obs=20)
+    assert np.array_equal(indices, legacy_result), "arange choices should match indices"
+    assert np.array_equal(legacy_result, expected)
+
+
 def test_sample_copy_backed(tmp_path):
     adata_m = AnnData(np.random.rand(200, 10).astype(np.float32))
     adata_d = adata_m.copy()

From 59a171cce5e97e9f81daa20686bd776ac8beb2af Mon Sep 17 00:00:00 2001
From: "Philipp A." <flying-sheep@web.de>
Date: Thu, 21 Nov 2024 13:03:58 +0100
Subject: [PATCH 19/28] Fix tests for old Pythons

---
 src/scanpy/_compat.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/scanpy/_compat.py b/src/scanpy/_compat.py
index b8ad666906..720bebc6e2 100644
--- a/src/scanpy/_compat.py
+++ b/src/scanpy/_compat.py
@@ -4,7 +4,7 @@
 import sys
 import warnings
 from dataclasses import dataclass, field
-from functools import cache, partial, wraps
+from functools import WRAPPER_ASSIGNMENTS, cache, partial, wraps
 from importlib.util import find_spec
 from pathlib import Path
 from typing import TYPE_CHECKING, Literal, ParamSpec, TypeVar, cast, overload
@@ -230,7 +230,8 @@ def _delegate(cls) -> None:
                 continue
 
             def mk_wrapper(name: str):
-                @wraps(meth)
+                # Old pytest versions try to run the doctests
+                @wraps(meth, assigned=set(WRAPPER_ASSIGNMENTS) - {"__doc__"})
                 def wrapper(self: _FakeRandomGen, *args, **kwargs):
                     return getattr(self._state, name)(*args, **kwargs)
 

From 59adc76818b7a005a35ee4b4f2783c7e86eab632 Mon Sep 17 00:00:00 2001
From: "Philipp A." <flying-sheep@web.de>
Date: Thu, 21 Nov 2024 13:31:51 +0100
Subject: [PATCH 20/28] test that random state is modified

---
 src/scanpy/_compat.py | 17 ++++++-----------
 tests/test_utils.py   | 25 +++++++++++++++++++------
 2 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/src/scanpy/_compat.py b/src/scanpy/_compat.py
index 720bebc6e2..9c89f94f27 100644
--- a/src/scanpy/_compat.py
+++ b/src/scanpy/_compat.py
@@ -204,17 +204,12 @@ def _legacy_numpy_gen(
     random_state: _LegacyRandom | None = None,
 ) -> np.random.Generator:
     """Return a random generator that behaves like the legacy one."""
-
-    if random_state is not None:
-        if isinstance(random_state, np.random.RandomState):
-            np.random.set_state(random_state.get_state(legacy=False))
-            return _FakeRandomGen(random_state)
-        np.random.seed(random_state)
-    state = np.random.get_state(legacy=True)
-    assert isinstance(state, tuple)
-    bit_gen = np.random.MT19937()
-    bit_gen.state = state
-    return _FakeRandomGen(np.random.RandomState(bit_gen))
+    if random_state is None:
+        return _FakeRandomGen(np.random.RandomState(np.random.get_bit_generator()))
+    if isinstance(random_state, np.random.RandomState):
+        np.random.set_state(random_state.get_state(legacy=False))
+        return _FakeRandomGen(random_state)
+    np.random.seed(random_state)
 
 
 class _FakeRandomGen(np.random.Generator):
diff --git a/tests/test_utils.py b/tests/test_utils.py
index aebd6b7ec5..aba6456089 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -252,14 +252,27 @@ def test_is_constant_dask(request: pytest.FixtureRequest, axis, expected, block_
 @pytest.mark.parametrize("seed", [0, 1, 1256712675])
 @pytest.mark.parametrize("func", ["choice"])
 def test_legacy_numpy_gen(seed: int, func: str):
-    arr_module = _mk_random(seed, func, legacy=True)
-    arr_generator = _mk_random(seed, func, legacy=False)
-    np.testing.assert_array_equal(arr_module, arr_generator)
+    np.random.seed(seed)
+    state_before = np.random.get_state(legacy=False)
+
+    arrs = {}
+    states_after = {}
+    for direct in [True, False]:
+        np.random.seed(seed)
+        arrs[direct] = _mk_random(func, direct=direct)
+        states_after[direct] = np.random.get_state(legacy=False)
+
+    np.testing.assert_array_equal(arrs[True], arrs[False])
+    np.testing.assert_equal(
+        *states_after.values(), err_msg="both should affect global state the same"
+    )
+    # they should affect the global state
+    with pytest.raises(AssertionError):
+        np.testing.assert_equal(states_after[True], state_before)
 
 
-def _mk_random(seed: int, func: str, *, legacy: bool) -> np.ndarray:
-    np.random.seed(seed)
-    gen = np.random if legacy else _legacy_numpy_gen()
+def _mk_random(func: str, *, direct: bool) -> np.ndarray:
+    gen = np.random if direct else _legacy_numpy_gen()
     match func:
         case "choice":
             arr = np.arange(1000)

From ef27db0de5f948df25fecc39e16058ad949cf70f Mon Sep 17 00:00:00 2001
From: "Philipp A." <flying-sheep@web.de>
Date: Thu, 21 Nov 2024 13:41:00 +0100
Subject: [PATCH 21/28] Fix util

---
 src/scanpy/_compat.py | 13 +++++++------
 tests/test_utils.py   | 14 +++++++++-----
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/src/scanpy/_compat.py b/src/scanpy/_compat.py
index 9c89f94f27..bf13cd2320 100644
--- a/src/scanpy/_compat.py
+++ b/src/scanpy/_compat.py
@@ -204,12 +204,13 @@ def _legacy_numpy_gen(
     random_state: _LegacyRandom | None = None,
 ) -> np.random.Generator:
     """Return a random generator that behaves like the legacy one."""
-    if random_state is None:
-        return _FakeRandomGen(np.random.RandomState(np.random.get_bit_generator()))
-    if isinstance(random_state, np.random.RandomState):
-        np.random.set_state(random_state.get_state(legacy=False))
-        return _FakeRandomGen(random_state)
-    np.random.seed(random_state)
+
+    if random_state is not None:
+        if isinstance(random_state, np.random.RandomState):
+            np.random.set_state(random_state.get_state(legacy=False))
+            return _FakeRandomGen(random_state)
+        np.random.seed(random_state)
+    return _FakeRandomGen(np.random.RandomState(np.random.get_bit_generator()))
 
 
 class _FakeRandomGen(np.random.Generator):
diff --git a/tests/test_utils.py b/tests/test_utils.py
index aba6456089..2cee7a3e30 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -250,16 +250,18 @@ def test_is_constant_dask(request: pytest.FixtureRequest, axis, expected, block_
 
 
 @pytest.mark.parametrize("seed", [0, 1, 1256712675])
+@pytest.mark.parametrize("pass_seed", [True, False], ids=["pass_seed", "set_seed"])
 @pytest.mark.parametrize("func", ["choice"])
-def test_legacy_numpy_gen(seed: int, func: str):
+def test_legacy_numpy_gen(*, seed: int, pass_seed: bool, func: str):
     np.random.seed(seed)
     state_before = np.random.get_state(legacy=False)
 
     arrs = {}
     states_after = {}
     for direct in [True, False]:
-        np.random.seed(seed)
-        arrs[direct] = _mk_random(func, direct=direct)
+        if not pass_seed:
+            np.random.seed(seed)
+        arrs[direct] = _mk_random(func, direct=direct, seed=seed if pass_seed else None)
         states_after[direct] = np.random.get_state(legacy=False)
 
     np.testing.assert_array_equal(arrs[True], arrs[False])
@@ -271,8 +273,10 @@ def test_legacy_numpy_gen(seed: int, func: str):
         np.testing.assert_equal(states_after[True], state_before)
 
 
-def _mk_random(func: str, *, direct: bool) -> np.ndarray:
-    gen = np.random if direct else _legacy_numpy_gen()
+def _mk_random(func: str, *, direct: bool, seed: int | None) -> np.ndarray:
+    if direct and seed is not None:
+        np.random.seed(seed)
+    gen = np.random if direct else _legacy_numpy_gen(seed)
     match func:
         case "choice":
             arr = np.arange(1000)

From c471e94d4edf39002a1b352afa2597c9f0a8a8d4 Mon Sep 17 00:00:00 2001
From: "Philipp A." <flying-sheep@web.de>
Date: Thu, 21 Nov 2024 13:42:05 +0100
Subject: [PATCH 22/28] types

---
 tests/test_utils.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/test_utils.py b/tests/test_utils.py
index 2cee7a3e30..81369a6938 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -2,6 +2,7 @@
 
 from operator import mul, truediv
 from types import ModuleType
+from typing import TYPE_CHECKING
 
 import numpy as np
 import pytest
@@ -26,6 +27,9 @@
     ARRAY_TYPES_SPARSE_DASK_UNSUPPORTED,
 )
 
+if TYPE_CHECKING:
+    from typing import Any
+
 
 def test_descend_classes_and_funcs():
     # create module hierarchy
@@ -256,8 +260,8 @@ def test_legacy_numpy_gen(*, seed: int, pass_seed: bool, func: str):
     np.random.seed(seed)
     state_before = np.random.get_state(legacy=False)
 
-    arrs = {}
-    states_after = {}
+    arrs: dict[bool, np.ndarray] = {}
+    states_after: dict[bool, dict[str, Any]] = {}
     for direct in [True, False]:
         if not pass_seed:
             np.random.seed(seed)

From 3028dff59435f94a1fa90eae7a2f8bcb746ddbc9 Mon Sep 17 00:00:00 2001
From: "Philipp A." <flying-sheep@web.de>
Date: Thu, 21 Nov 2024 13:49:48 +0100
Subject: [PATCH 23/28] move deprecated stuff

---
 src/scanpy/preprocessing/__init__.py          |  2 +-
 .../preprocessing/_deprecated/sampling.py     | 60 +++++++++++++++++++
 src/scanpy/preprocessing/_simple.py           | 48 +--------------
 3 files changed, 62 insertions(+), 48 deletions(-)
 create mode 100644 src/scanpy/preprocessing/_deprecated/sampling.py

diff --git a/src/scanpy/preprocessing/__init__.py b/src/scanpy/preprocessing/__init__.py
index c407850282..4307cbb6c9 100644
--- a/src/scanpy/preprocessing/__init__.py
+++ b/src/scanpy/preprocessing/__init__.py
@@ -3,6 +3,7 @@
 from ..neighbors import neighbors
 from ._combat import combat
 from ._deprecated.highly_variable_genes import filter_genes_dispersion
+from ._deprecated.sampling import subsample
 from ._highly_variable_genes import highly_variable_genes
 from ._normalization import normalize_total
 from ._pca import pca
@@ -19,7 +20,6 @@
     regress_out,
     sample,
     sqrt,
-    subsample,
 )
 
 __all__ = [
diff --git a/src/scanpy/preprocessing/_deprecated/sampling.py b/src/scanpy/preprocessing/_deprecated/sampling.py
new file mode 100644
index 0000000000..02619a2364
--- /dev/null
+++ b/src/scanpy/preprocessing/_deprecated/sampling.py
@@ -0,0 +1,60 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from ..._compat import _legacy_numpy_gen, old_positionals
+from .._simple import sample
+
+if TYPE_CHECKING:
+    import numpy as np
+    from anndata import AnnData
+    from numpy.typing import NDArray
+    from scipy.sparse import csc_matrix, csr_matrix
+
+    from ..._compat import _LegacyRandom
+
+    CSMatrix = csr_matrix | csc_matrix
+
+
+@old_positionals("n_obs", "random_state", "copy")
+def subsample(
+    data: AnnData | np.ndarray | CSMatrix,
+    fraction: float | None = None,
+    *,
+    n_obs: int | None = None,
+    random_state: _LegacyRandom = 0,
+    copy: bool = False,
+) -> AnnData | tuple[np.ndarray | CSMatrix, NDArray[np.int64]] | None:
+    """\
+    Subsample to a fraction of the number of observations.
+
+    .. deprecated:: 1.11.0
+
+       Use :func:`~scanpy.pp.sample` instead.
+
+    Parameters
+    ----------
+    data
+        The (annotated) data matrix of shape `n_obs` × `n_vars`.
+        Rows correspond to cells and columns to genes.
+    fraction
+        Subsample to this `fraction` of the number of observations.
+    n_obs
+        Subsample to this number of observations.
+    random_state
+        Random seed to change subsampling.
+    copy
+        If an :class:`~anndata.AnnData` is passed,
+        determines whether a copy is returned.
+
+    Returns
+    -------
+    Returns `X[obs_indices], obs_indices` if data is array-like, otherwise
+    subsamples the passed :class:`~anndata.AnnData` (`copy == False`) or
+    returns a subsampled copy of it (`copy == True`).
+    """
+
+    rng = _legacy_numpy_gen(random_state)
+    return sample(
+        data=data, fraction=fraction, n=n_obs, rng=rng, copy=copy, replace=False, axis=0
+    )
diff --git a/src/scanpy/preprocessing/_simple.py b/src/scanpy/preprocessing/_simple.py
index 2b26b162a8..b9d548c771 100644
--- a/src/scanpy/preprocessing/_simple.py
+++ b/src/scanpy/preprocessing/_simple.py
@@ -18,7 +18,7 @@
 from sklearn.utils import check_array, sparsefuncs
 
 from .. import logging as logg
-from .._compat import _legacy_numpy_gen, njit, old_positionals
+from .._compat import njit, old_positionals
 from .._settings import settings as sett
 from .._utils import (
     _check_array_function_arguments,
@@ -39,9 +39,6 @@
 except ImportError:
     da = None
 
-# backwards compat
-from ._deprecated.highly_variable_genes import filter_genes_dispersion  # noqa: F401
-
 if TYPE_CHECKING:
     from collections.abc import Collection, Iterable, Sequence
     from numbers import Number
@@ -950,49 +947,6 @@ def sample(
         data._inplace_subset_var(indices)
 
 
-@old_positionals("n_obs", "random_state", "copy")
-def subsample(
-    data: AnnData | np.ndarray | CSMatrix,
-    fraction: float | None = None,
-    *,
-    n_obs: int | None = None,
-    random_state: _LegacyRandom = 0,
-    copy: bool = False,
-) -> AnnData | tuple[np.ndarray | CSMatrix, NDArray[np.int64]] | None:
-    """\
-    Subsample to a fraction of the number of observations.
-
-    .. deprecated:: 1.11.0
-
-       Use :func:`~scanpy.pp.sample` instead.
-
-    Parameters
-    ----------
-    data
-        The (annotated) data matrix of shape `n_obs` × `n_vars`.
-        Rows correspond to cells and columns to genes.
-    fraction
-        Subsample to this `fraction` of the number of observations.
-    n_obs
-        Subsample to this number of observations.
-    random_state
-        Random seed to change subsampling.
-    copy
-        If an :class:`~anndata.AnnData` is passed,
-        determines whether a copy is returned.
-
-    Returns
-    -------
-    Returns `X[obs_indices], obs_indices` if data is array-like, otherwise
-    subsamples the passed :class:`~anndata.AnnData` (`copy == False`) or
-    returns a subsampled copy of it (`copy == True`).
-    """
-    rng = _legacy_numpy_gen(random_state)
-    return sample(
-        data=data, fraction=fraction, n=n_obs, rng=rng, copy=copy, replace=False, axis=0
-    )
-
-
 @renamed_arg("target_counts", "counts_per_cell")
 def downsample_counts(
     adata: AnnData,

From f11b6ba93edbdd763acd824cb176cb64a9a63e39 Mon Sep 17 00:00:00 2001
From: "Philipp A." <flying-sheep@web.de>
Date: Thu, 21 Nov 2024 14:04:06 +0100
Subject: [PATCH 24/28] Use deprecation decorator

---
 pyproject.toml                                |  1 +
 src/scanpy/_compat.py                         | 13 ++++++++++
 src/scanpy/plotting/_preprocessing.py         |  3 ++-
 .../_deprecated/highly_variable_genes.py      | 24 +++++++++----------
 src/scanpy/preprocessing/_simple.py           | 21 ++++++++--------
 5 files changed, 39 insertions(+), 23 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index cfb7ffd28a..324c4c4262 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -66,6 +66,7 @@ dependencies = [
     "packaging>=21.3",
     "session-info",
     "legacy-api-wrap>=1.4",  # for positional API deprecations
+    "typing-extensions; python_version < '3.13'",
 ]
 dynamic = ["version"]
 
diff --git a/src/scanpy/_compat.py b/src/scanpy/_compat.py
index dca6c84c4e..b97b1a8603 100644
--- a/src/scanpy/_compat.py
+++ b/src/scanpy/_compat.py
@@ -48,6 +48,10 @@ class ZappyArray:
     "fullname",
     "pkg_metadata",
     "pkg_version",
+    "old_positionals",
+    "deprecated",
+    "njit",
+    "_numba_threading_layer",
 ]
 
 
@@ -102,6 +106,15 @@ def old_positionals(*old_positionals: str):
         return lambda func: func
 
 
+if sys.version_info >= (3, 13):
+    from warnings import deprecated as _deprecated
+else:
+    from typing_extensions import deprecated as _deprecated
+
+
+deprecated = partial(_deprecated, category=FutureWarning)
+
+
 @overload
 def njit(fn: Callable[P, R], /) -> Callable[P, R]: ...
 @overload
diff --git a/src/scanpy/plotting/_preprocessing.py b/src/scanpy/plotting/_preprocessing.py
index e6c7808be1..b51688082e 100644
--- a/src/scanpy/plotting/_preprocessing.py
+++ b/src/scanpy/plotting/_preprocessing.py
@@ -6,7 +6,7 @@
 from matplotlib import pyplot as plt
 from matplotlib import rcParams
 
-from .._compat import old_positionals
+from .._compat import deprecated, old_positionals
 from .._settings import settings
 from . import _utils
 
@@ -103,6 +103,7 @@ def highly_variable_genes(
 
 
 # backwards compat
+@deprecated("Use sc.pl.highly_variable_genes instead")
 @old_positionals("log", "show", "save")
 def filter_genes_dispersion(
     result: np.recarray,
diff --git a/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py b/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py
index f2c3ce971b..27e8f1f846 100644
--- a/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py
+++ b/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py
@@ -9,7 +9,7 @@
 from scipy.sparse import issparse
 
 from ... import logging as logg
-from ..._compat import old_positionals
+from ..._compat import deprecated, old_positionals
 from .._distributed import materialize_as_ndarray
 from .._utils import _get_mean_var
 
@@ -19,6 +19,7 @@
     from scipy.sparse import spmatrix
 
 
+@deprecated("Use sc.pp.highly_variable_genes instead")
 @old_positionals(
     "flavor",
     "min_disp",
@@ -48,18 +49,17 @@ def filter_genes_dispersion(
     """\
     Extract highly variable genes :cite:p:`Satija2015,Zheng2017`.
 
-    .. warning::
-        .. deprecated:: 1.3.6
-            Use :func:`~scanpy.pp.highly_variable_genes`
-            instead. The new function is equivalent to the present
-            function, except that
+    .. deprecated:: 1.3.6
 
-            * the new function always expects logarithmized data
-            * `subset=False` in the new function, it suffices to
-              merely annotate the genes, tools like `pp.pca` will
-              detect the annotation
-            * you can now call: `sc.pl.highly_variable_genes(adata)`
-            * `copy` is replaced by `inplace`
+       Use :func:`~scanpy.pp.highly_variable_genes` instead.
+       The new function is equivalent to the present function, except that
+
+       * the new function always expects logarithmized data
+       * `subset=False` in the new function, it suffices to
+         merely annotate the genes, tools like `pp.pca` will
+         detect the annotation
+       * you can now call: `sc.pl.highly_variable_genes(adata)`
+       * `copy` is replaced by `inplace`
 
     If trying out parameters, pass the data matrix instead of AnnData.
 
diff --git a/src/scanpy/preprocessing/_simple.py b/src/scanpy/preprocessing/_simple.py
index 01936414a5..eaf9648690 100644
--- a/src/scanpy/preprocessing/_simple.py
+++ b/src/scanpy/preprocessing/_simple.py
@@ -18,7 +18,7 @@
 from sklearn.utils import check_array, sparsefuncs
 
 from .. import logging as logg
-from .._compat import njit, old_positionals
+from .._compat import deprecated, njit, old_positionals
 from .._settings import settings as sett
 from .._utils import (
     _check_array_function_arguments,
@@ -474,6 +474,7 @@ def sqrt(
         return X.sqrt()
 
 
+@deprecated("Use sc.pp.normalize_total instead")
 @old_positionals(
     "counts_per_cell_after",
     "counts_per_cell",
@@ -497,16 +498,16 @@ def normalize_per_cell(
     """\
     Normalize total counts per cell.
 
-    .. warning::
-        .. deprecated:: 1.3.7
-            Use :func:`~scanpy.pp.normalize_total` instead.
-            The new function is equivalent to the present
-            function, except that
+    .. deprecated:: 1.3.7
 
-            * the new function doesn't filter cells based on `min_counts`,
-              use :func:`~scanpy.pp.filter_cells` if filtering is needed.
-            * some arguments were renamed
-            * `copy` is replaced by `inplace`
+       Use :func:`~scanpy.pp.normalize_total` instead.
+       The new function is equivalent to the present
+       function, except that
+
+       * the new function doesn't filter cells based on `min_counts`,
+         use :func:`~scanpy.pp.filter_cells` if filtering is needed.
+       * some arguments were renamed
+       * `copy` is replaced by `inplace`
 
     Normalize each cell by total counts over all genes, so that every cell has
     the same total count after normalization.

From 735f00aaa8622fcc3c3c3fff872088d9f060b13c Mon Sep 17 00:00:00 2001
From: "Philipp A." <flying-sheep@web.de>
Date: Thu, 21 Nov 2024 14:15:09 +0100
Subject: [PATCH 25/28] relnote

---
 docs/release-notes/3380.bugfix.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 docs/release-notes/3380.bugfix.md

diff --git a/docs/release-notes/3380.bugfix.md b/docs/release-notes/3380.bugfix.md
new file mode 100644
index 0000000000..633ce346af
--- /dev/null
+++ b/docs/release-notes/3380.bugfix.md
@@ -0,0 +1 @@
+Raise {exc}`FutureWarning` when calling deprecated {mod}`scanpy.pp` functions {smaller}`P Angerer`

From 0a5b284bbe75a21d76ad41a793de041eac08edf1 Mon Sep 17 00:00:00 2001
From: "Philipp A." <flying-sheep@web.de>
Date: Thu, 21 Nov 2024 15:24:57 +0100
Subject: [PATCH 26/28] fix dask warning stuff

---
 tests/test_preprocessing.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py
index 4bf2d128aa..835e8ec394 100644
--- a/tests/test_preprocessing.py
+++ b/tests/test_preprocessing.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import warnings
+from importlib.util import find_spec
 from itertools import product
 from pathlib import Path
 from typing import TYPE_CHECKING
@@ -22,7 +23,7 @@
     maybe_dask_process_context,
 )
 from testing.scanpy._helpers.data import pbmc3k, pbmc68k_reduced
-from testing.scanpy._pytest.params import ARRAY_TYPES, param_with
+from testing.scanpy._pytest.params import ARRAY_TYPES
 
 if TYPE_CHECKING:
     from collections.abc import Callable
@@ -143,16 +144,7 @@ def test_normalize_per_cell():
     assert adata.X.sum(axis=1).tolist() == adata_sparse.X.sum(axis=1).A1.tolist()
 
 
-ignore_dask_perf = pytest.mark.filterwarnings("ignore::dask.array.PerformanceWarning")
-
-
-@pytest.mark.parametrize(
-    "array_type",
-    [
-        param_with(p, marks=[ignore_dask_perf] if "dask" in (p.id or "") else [])
-        for p in ARRAY_TYPES
-    ],
-)
+@pytest.mark.parametrize("array_type", ARRAY_TYPES)
 @pytest.mark.parametrize("which", ["copy", "inplace", "array"])
 @pytest.mark.parametrize(
     ("axis", "fraction", "n", "replace", "expected"),
@@ -177,6 +169,11 @@ def test_sample(
 ):
     adata = AnnData(array_type(np.ones((200, 10))))
 
+    # ignoring this warning declaratively is a pain so do it here
+    if find_spec("dask"):
+        import dask.array as da
+
+        warnings.filterwarnings(category=da.PerformanceWarning)
     # can’t guarantee that duplicates are drawn when `replace=True`,
     # so we just ignore the warning instead using `with pytest.warns(...)`
     warnings.filterwarnings(

From 0ca941111997a7592267584ba92699fb96c677ab Mon Sep 17 00:00:00 2001
From: "Philipp A." <flying-sheep@web.de>
Date: Thu, 21 Nov 2024 15:56:19 +0100
Subject: [PATCH 27/28] oops

---
 tests/test_preprocessing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py
index 835e8ec394..15318f2c52 100644
--- a/tests/test_preprocessing.py
+++ b/tests/test_preprocessing.py
@@ -173,7 +173,7 @@ def test_sample(
     if find_spec("dask"):
         import dask.array as da
 
-        warnings.filterwarnings(category=da.PerformanceWarning)
+        warnings.filterwarnings("ignore", category=da.PerformanceWarning)
     # can’t guarantee that duplicates are drawn when `replace=True`,
     # so we just ignore the warning instead using `with pytest.warns(...)`
     warnings.filterwarnings(

From 396b21a217711249e1803a01357a14da54bc6106 Mon Sep 17 00:00:00 2001
From: "Philipp A." <flying-sheep@web.de>
Date: Fri, 22 Nov 2024 16:04:13 +0100
Subject: [PATCH 28/28] Bump numpy to version that has get_bit_generator

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 324c4c4262..b882604268 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,7 +47,7 @@ classifiers = [
 ]
 dependencies = [
     "anndata>=0.8",
-    "numpy>=1.23",
+    "numpy>=1.24",
     "matplotlib>=3.6",
     "pandas >=1.5",
     "scipy>=1.8",