Skip to content

Commit

Permalink
Add denoise tests
Browse files Browse the repository at this point in the history
  • Loading branch information
asistradition committed Feb 29, 2024
1 parent 2e6dfb7 commit 0b36e10
Show file tree
Hide file tree
Showing 7 changed files with 299 additions and 53 deletions.
100 changes: 92 additions & 8 deletions inferelator_velocity/denoise.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import numpy as np
import scipy.sparse as sps

from inferelator_velocity.utils.math import array_sum
from inferelator_velocity.utils.noise2self import (
_dist_to_row_stochastic,
dot
Expand All @@ -15,7 +17,10 @@ def denoise(
layer='X',
graph_key=NOISE2SELF_DIST_KEY,
output_layer=NOISE2SELF_DENOISED_KEY,
dense=True
dense=True,
chunk_size=10000,
zero_threshold=None,
obs_count_key=None
):

lref = data.X if layer == 'X' else data.layers[layer]
Expand All @@ -26,13 +31,92 @@ def denoise(
f"run global_graph() first"
)

data.layers[output_layer] = np.zeros(lref.shape, dtype=np.float32)
if data.obsp[graph_key].dtype != lref.dtype:
raise RuntimeError(
f"Graph dtype {data.obsp[graph_key].dtype} is not the "
f"same as data dtype {lref.dtype}; "
"these must match and be float32 or float64"
)

dot(
_dist_to_row_stochastic(data.obsp[graph_key]),
lref,
dense=dense,
out=data.layers[output_layer]
)
_n_obs = lref.shape[0]

if chunk_size is not None:
_n_chunks = int(np.ceil(_n_obs / chunk_size))
else:
_n_chunks = 1

if _n_chunks == 1:
data.layers[output_layer] = _denoise_chunk(
lref,
_dist_to_row_stochastic(data.obsp[graph_key]),
dense=dense,
zero_threshold=zero_threshold
)

elif dense or not sps.issparse(lref):
data.layers[output_layer] = np.zeros(lref.shape, dtype=np.float32)

for i in range(_n_chunks):
_start, _stop = i * chunk_size, min((i + 1) * chunk_size, _n_obs)

_denoise_chunk(
lref,
_dist_to_row_stochastic(
data.obsp[graph_key][_start:_stop, :]
),
dense=True,
out=data.layers[output_layer][_start:_stop, :],
zero_threshold=zero_threshold
)

else:
data.layers[output_layer] = sps.vstack(
tuple(
_denoise_chunk(
lref,
_dist_to_row_stochastic(
data.obsp[graph_key][
i * chunk_size:min((i + 1) * chunk_size, _n_obs),
:
]
),
zero_threshold=zero_threshold
)
for i in range(_n_chunks)
)
)

if obs_count_key is not None:
data.obs[obs_count_key] = array_sum(
data.layers[output_layer],
axis=1
)

return data


def _denoise_chunk(
x,
graph,
zero_threshold=None,
out=None,
dense=False
):

if not sps.issparse(x):
dense = True

out = dot(
graph,
x,
out=out,
dense=dense
)

if zero_threshold is not None and dense:
out[out < zero_threshold] = 0
elif zero_threshold:
out.data[out.data < zero_threshold] = 0
out.eliminate_zeros()

return out
63 changes: 63 additions & 0 deletions inferelator_velocity/tests/_stubs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import numpy as np
import anndata as ad
import scipy.sparse as sps

KNN_N = 10

DIST = np.tile(np.arange(KNN_N), (KNN_N, 1)).astype(float)
CONN = 1 / (DIST + 1)

KNN = np.diag(np.ones(KNN_N - 1), -1) + np.diag(np.ones(KNN_N - 1), 1)
KNN[0, KNN_N - 1] = 1
KNN[KNN_N - 1, 0] = 1

rng = np.random.default_rng(100)
COUNTS = rng.negative_binomial(5, 0.5, (1000, 10))

N = 1000
BINS = 10

EXPRESSION = np.zeros((N, 6), dtype=int)
EXPRESSION[:, 0] = (100 * np.random.default_rng(222222).random(N)).astype(int)
EXPRESSION[:, 1] = EXPRESSION[:, 0] * 1.75 - 0.5
EXPRESSION[:, 2] = EXPRESSION[:, 0] ** 2
EXPRESSION[:, 3] = 0
EXPRESSION[:, 4] = np.arange(N)
EXPRESSION[:, 5] = np.arange(N) * 2 + 10

K = 15
EXPR_KNN = sps.csr_matrix(
(
rng.uniform(low=0.1, high=2., size=K * N),
(
np.repeat(np.arange(N), K),
np.concatenate(
tuple(
rng.choice(np.arange(1000), size=(K, ), replace=False)
for _ in range(N)
)
)
)
),
dtype=np.float32
)

EXPRESSION_ADATA = ad.AnnData(EXPRESSION.astype(int))

ADATA_UNS_PROGRAM_KEYS = [
'metric',
'leiden_correlation',
'metric_genes',
'information_distance',
'cluster_program_map',
'n_comps',
'n_programs',
'program_names',
'molecular_cv_loss'
]

PROGRAMS = ['0', '0', '0', '-1', '1', '1']
PROGRAMS_ASSIGNED = ['0', '0', '0', '0', '1', '1']

TIMES_0 = EXPRESSION[:, 0] / 100
TIMES_1 = np.arange(N).astype(float)
119 changes: 119 additions & 0 deletions inferelator_velocity/tests/test_denoise.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import unittest
import numpy as np
import numpy.testing as npt
import scipy.sparse as sps

from ._stubs import (
EXPR_KNN,
EXPRESSION_ADATA
)

from inferelator_velocity.denoise import (
_dist_to_row_stochastic,
denoise
)

DENOISE_EXPR = np.dot(
_dist_to_row_stochastic(EXPR_KNN).A,
EXPRESSION_ADATA.X
)


class TestDenoise(unittest.TestCase):

threshold = None

@classmethod
def setUpClass(cls) -> None:
cls.expect = DENOISE_EXPR.copy()

if cls.threshold is not None:
cls.expect[cls.expect < cls.threshold] = 0

return super().setUpClass()

def setUp(self) -> None:
self.data = EXPRESSION_ADATA.copy()
self.data.X = self.data.X.astype(np.float32)
self.data.obsp['graph'] = EXPR_KNN

return super().setUp()

def test_errors(self):

with self.assertRaises(RuntimeError):
denoise(
self.data,
chunk_size=500,
output_layer='abc',
zero_threshold=self.threshold
)

self.data.X = self.data.X.astype(int)
with self.assertRaises(RuntimeError):
denoise(
self.data,
chunk_size=500,
graph_key='graph',
output_layer='abc',
zero_threshold=self.threshold
)

def test_denoise_dense_to_dense(self):

denoise(
self.data,
chunk_size=500,
graph_key='graph',
output_layer='abc',
zero_threshold=self.threshold
)

npt.assert_almost_equal(
self.expect,
self.data.layers['abc'],
decimal=2
)

def test_denoise_sparse_to_dense(self):

self.data.X = sps.csr_array(self.data.X)

denoise(
self.data,
chunk_size=500,
graph_key='graph',
output_layer='abc',
dense=True,
zero_threshold=self.threshold
)

npt.assert_almost_equal(
self.expect,
self.data.layers['abc'],
decimal=2
)

def test_denoise_sparse_to_sparse(self):

self.data.X = sps.csr_array(self.data.X)

denoise(
self.data,
chunk_size=500,
graph_key='graph',
output_layer='abc',
dense=False,
zero_threshold=self.threshold
)

npt.assert_almost_equal(
self.expect,
self.data.layers['abc'].A,
decimal=2
)


class TestDenoiseThreshold(TestDenoise):

threshold = 100
21 changes: 11 additions & 10 deletions inferelator_velocity/tests/test_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,17 @@
import numpy as np
import numpy.testing as npt

from inferelator_velocity.utils.graph import (get_shortest_paths, get_total_path, local_optimal_knn)

N = 10

DIST = np.tile(np.arange(N), (N, 1)).astype(float)
CONN = 1 / (DIST + 1)

KNN = np.diag(np.ones(N - 1), -1) + np.diag(np.ones(N - 1), 1)
KNN[0, N - 1] = 1
KNN[N - 1, 0] = 1
from inferelator_velocity.utils.graph import (
get_shortest_paths,
get_total_path,
local_optimal_knn
)
from ._stubs import (
KNN_N as N,
DIST,
CONN,
KNN
)

COL = {
'a': ('b', 0, 0.5),
Expand Down
6 changes: 3 additions & 3 deletions inferelator_velocity/tests/test_mcv.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

from inferelator_velocity.utils.mcv import mcv_pcs


rng = np.random.default_rng(100)
COUNTS = rng.negative_binomial(5, 0.5, (1000, 10))
from ._stubs import (
COUNTS
)


class TestMCV(unittest.TestCase):
Expand Down
Empty file.
Loading

0 comments on commit 0b36e10

Please sign in to comment.