diff --git a/docs/api-reference.md b/docs/api-reference.md index c81ef90..a459743 100644 --- a/docs/api-reference.md +++ b/docs/api-reference.md @@ -7,6 +7,7 @@ :toctree: generated atleast_nd + cov expand_dims kron ``` diff --git a/src/array_api_extra/__init__.py b/src/array_api_extra/__init__.py index b26d27c..7a46760 100644 --- a/src/array_api_extra/__init__.py +++ b/src/array_api_extra/__init__.py @@ -1,7 +1,7 @@ from __future__ import annotations -from ._funcs import atleast_nd, expand_dims, kron +from ._funcs import atleast_nd, cov, expand_dims, kron __version__ = "0.1.2.dev0" -__all__ = ["__version__", "atleast_nd", "expand_dims", "kron"] +__all__ = ["__version__", "atleast_nd", "cov", "expand_dims", "kron"] diff --git a/src/array_api_extra/_funcs.py b/src/array_api_extra/_funcs.py index 234617f..a371768 100644 --- a/src/array_api_extra/_funcs.py +++ b/src/array_api_extra/_funcs.py @@ -1,11 +1,12 @@ from __future__ import annotations +import warnings from typing import TYPE_CHECKING if TYPE_CHECKING: from ._typing import Array, ModuleType -__all__ = ["atleast_nd", "expand_dims", "kron"] +__all__ = ["atleast_nd", "cov", "expand_dims", "kron"] def atleast_nd(x: Array, /, *, ndim: int, xp: ModuleType) -> Array: @@ -48,6 +49,117 @@ def atleast_nd(x: Array, /, *, ndim: int, xp: ModuleType) -> Array: return x +def cov(m: Array, /, *, xp: ModuleType) -> Array: + """ + Estimate a covariance matrix. + + Covariance indicates the level to which two variables vary together. + If we examine N-dimensional samples, :math:`X = [x_1, x_2, ... x_N]^T`, + then the covariance matrix element :math:`C_{ij}` is the covariance of + :math:`x_i` and :math:`x_j`. The element :math:`C_{ii}` is the variance + of :math:`x_i`. + + This provides a subset of the functionality of ``numpy.cov``. + + Parameters + ---------- + m : array + A 1-D or 2-D array containing multiple variables and observations. + Each row of `m` represents a variable, and each column a single + observation of all those variables. + xp : array_namespace + The standard-compatible namespace for `m`. + + Returns + ------- + res : array + The covariance matrix of the variables. + + Examples + -------- + >>> import array_api_strict as xp + >>> import array_api_extra as xpx + + Consider two variables, :math:`x_0` and :math:`x_1`, which + correlate perfectly, but in opposite directions: + + >>> x = xp.asarray([[0, 2], [1, 1], [2, 0]]).T + >>> x + Array([[0, 1, 2], + [2, 1, 0]], dtype=array_api_strict.int64) + + Note how :math:`x_0` increases while :math:`x_1` decreases. The covariance + matrix shows this clearly: + + >>> xpx.cov(x, xp=xp) + Array([[ 1., -1.], + [-1., 1.]], dtype=array_api_strict.float64) + + + Note that element :math:`C_{0,1}`, which shows the correlation between + :math:`x_0` and :math:`x_1`, is negative. + + Further, note how `x` and `y` are combined: + + >>> x = xp.asarray([-2.1, -1, 4.3]) + >>> y = xp.asarray([3, 1.1, 0.12]) + >>> X = xp.stack((x, y), axis=0) + >>> xpx.cov(X, xp=xp) + Array([[11.71 , -4.286 ], + [-4.286 , 2.14413333]], dtype=array_api_strict.float64) + + >>> xpx.cov(x, xp=xp) + Array(11.71, dtype=array_api_strict.float64) + + >>> xpx.cov(y, xp=xp) + Array(2.14413333, dtype=array_api_strict.float64) + + """ + m = xp.asarray(m, copy=True) + dtype = ( + xp.float64 if xp.isdtype(m.dtype, "integral") else xp.result_type(m, xp.float64) + ) + + m = atleast_nd(m, ndim=2, xp=xp) + m = xp.astype(m, dtype) + + avg = _mean(m, axis=1, xp=xp) + fact = m.shape[1] - 1 + + if fact <= 0: + warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning, stacklevel=2) + fact = 0.0 + + m -= avg[:, None] + m_transpose = m.T + if xp.isdtype(m_transpose.dtype, "complex floating"): + m_transpose = xp.conj(m_transpose) + c = m @ m_transpose + c /= fact + axes = tuple(axis for axis, length in enumerate(c.shape) if length == 1) + return xp.squeeze(c, axis=axes) + + +def _mean( + x: Array, + /, + *, + axis: int | tuple[int, ...] | None = None, + keepdims: bool = False, + xp: ModuleType, +) -> Array: + """ + Complex mean, https://github.com/data-apis/array-api/issues/846. + """ + if xp.isdtype(x.dtype, "complex floating"): + x_real = xp.real(x) + x_imag = xp.imag(x) + mean_real = xp.mean(x_real, axis=axis, keepdims=keepdims) + mean_imag = xp.mean(x_imag, axis=axis, keepdims=keepdims) + return mean_real + (mean_imag * xp.asarray(1j)) + return xp.mean(x, axis=axis, keepdims=keepdims) + + def expand_dims( a: Array, /, *, axis: int | tuple[int, ...] = (0,), xp: ModuleType ) -> Array: diff --git a/tests/test_funcs.py b/tests/test_funcs.py index 6ed3278..556add1 100644 --- a/tests/test_funcs.py +++ b/tests/test_funcs.py @@ -1,14 +1,15 @@ from __future__ import annotations import contextlib +import warnings from typing import TYPE_CHECKING, Any # array-api-strict#6 import array_api_strict as xp # type: ignore[import-untyped] import pytest -from numpy.testing import assert_array_equal, assert_equal +from numpy.testing import assert_allclose, assert_array_equal, assert_equal -from array_api_extra import atleast_nd, expand_dims, kron +from array_api_extra import atleast_nd, cov, expand_dims, kron if TYPE_CHECKING: Array = Any # To be changed to a Protocol later (see array-api#589) @@ -76,6 +77,41 @@ def test_5D(self): assert_array_equal(y, xp.ones((1, 1, 1, 1, 1, 1, 1, 1, 1))) +class TestCov: + def test_basic(self): + assert_allclose( + cov(xp.asarray([[0, 2], [1, 1], [2, 0]]).T, xp=xp), + xp.asarray([[1.0, -1.0], [-1.0, 1.0]]), + ) + + def test_complex(self): + x = xp.asarray([[1, 2, 3], [1j, 2j, 3j]]) + res = xp.asarray([[1.0, -1.0j], [1.0j, 1.0]]) + assert_allclose(cov(x, xp=xp), res) + + def test_empty(self): + with warnings.catch_warnings(record=True): + warnings.simplefilter("always", RuntimeWarning) + assert_array_equal(cov(xp.asarray([]), xp=xp), xp.nan) + assert_array_equal( + cov(xp.reshape(xp.asarray([]), (0, 2)), xp=xp), + xp.reshape(xp.asarray([]), (0, 0)), + ) + assert_array_equal( + cov(xp.reshape(xp.asarray([]), (2, 0)), xp=xp), + xp.asarray([[xp.nan, xp.nan], [xp.nan, xp.nan]]), + ) + + def test_combination(self): + x = xp.asarray([-2.1, -1, 4.3]) + y = xp.asarray([3, 1.1, 0.12]) + X = xp.stack((x, y), axis=0) + desired = xp.asarray([[11.71, -4.286], [-4.286, 2.144133]]) + assert_allclose(cov(X, xp=xp), desired, rtol=1e-6) + assert_allclose(cov(x, xp=xp), xp.asarray(11.71)) + assert_allclose(cov(y, xp=xp), xp.asarray(2.144133), rtol=1e-6) + + class TestKron: def test_basic(self): # Using 0-dimensional array