From 59bcb1a079ec47fffa302ca5883caa37fa0165fa Mon Sep 17 00:00:00 2001
From: Benjamin Desef <projekter-git@yahoo.de>
Date: Wed, 6 Nov 2024 12:57:00 +0100
Subject: [PATCH 1/6] Fix #28

---
 sparse_dot_mkl/_dense_dense.py           | 11 +++++++---
 sparse_dot_mkl/_gram_matrix.py           |  8 ++++++--
 sparse_dot_mkl/_mkl_interface/_common.py | 26 +++++++++++++-----------
 sparse_dot_mkl/_sparse_dense.py          | 17 ++++++++--------
 sparse_dot_mkl/_sparse_sparse.py         |  9 +++++---
 sparse_dot_mkl/_sparse_sypr.py           |  4 ++--
 sparse_dot_mkl/_sparse_vector.py         | 22 ++++++++++++--------
 7 files changed, 58 insertions(+), 39 deletions(-)

diff --git a/sparse_dot_mkl/_dense_dense.py b/sparse_dot_mkl/_dense_dense.py
index 7730f3e..55f9354 100644
--- a/sparse_dot_mkl/_dense_dense.py
+++ b/sparse_dot_mkl/_dense_dense.py
@@ -48,7 +48,7 @@ def _dense_matmul(matrix_a, matrix_b, scalar=1., out=None, out_scalar=None):
     # The complex versions of these functions take void pointers instead of passed structs
     # So create a C struct if necessary to be passed by reference
     scalar = _mkl_scalar(scalar, complex_type, double_precision)
-    out_scalar = _mkl_scalar(out_scalar, complex_type, double_precision)
+    out_scalar = _mkl_scalar(0 if out is None else out_scalar, complex_type, double_precision)
 
     func(layout_a,
          111,
@@ -75,8 +75,13 @@ def _dense_dot_dense(matrix_a, matrix_b, cast=False, scalar=1., out=None, out_sc
     # Check for edge condition inputs which result in empty outputs
     if _empty_output_check(matrix_a, matrix_b):
         debug_print("Skipping multiplication because A (dot) B must yield an empty matrix")
-        final_dtype = np.float64 if matrix_a.dtype != matrix_b.dtype or matrix_a.dtype != np.float32 else np.float32
-        return _out_matrix((matrix_a.shape[0], matrix_b.shape[1]), final_dtype, out_arr=out)
+        output_arr = _out_matrix((matrix_a.shape[0], matrix_b.shape[1]),
+                                 _type_check(matrix_a, matrix_b, cast=cast, convert=False), out_arr=out)
+        if out is None or (out_scalar is not None and not out_scalar):
+            output_arr.fill(0)
+        elif out_scalar is not None:
+            output_arr *= out_scalar
+        return output_arr
 
     matrix_a, matrix_b = _type_check(matrix_a, matrix_b, cast=cast)
 
diff --git a/sparse_dot_mkl/_gram_matrix.py b/sparse_dot_mkl/_gram_matrix.py
index 79e556a..cbf6813 100644
--- a/sparse_dot_mkl/_gram_matrix.py
+++ b/sparse_dot_mkl/_gram_matrix.py
@@ -141,10 +141,14 @@ def _gram_matrix_sparse_to_dense(
 
     if _empty_output_check(matrix_a, matrix_a):
         _destroy_mkl_handle(sp_ref_a)
+        if out is None or (out_scalar is not None and not out_scalar):
+            output_arr.fill(0)
+        elif out_scalar is not None:
+            output_arr *= out_scalar
         return output_arr
 
     scalar = _mkl_scalar(scalar, complex_type, double_prec)
-    out_scalar = _mkl_scalar(out_scalar, complex_type, double_prec)
+    out_scalar = _mkl_scalar(0 if out is None else out_scalar, complex_type, double_prec)
 
     ret_val = func(
         _mkl_sp_transpose_ops[(not aat, complex_type)],
@@ -230,7 +234,7 @@ def _gram_matrix_dense_to_dense(
     # passed structs, so create a C struct if necessary to be passed by
     # reference
     scalar = _mkl_scalar(scalar, complex_type, double_precision)
-    out_scalar = _mkl_scalar(out_scalar, complex_type, double_precision)
+    out_scalar = _mkl_scalar(0 if out is None else out_scalar, complex_type, double_precision)
 
     func(
         layout_a,
diff --git a/sparse_dot_mkl/_mkl_interface/_common.py b/sparse_dot_mkl/_mkl_interface/_common.py
index e9d5893..74e4854 100644
--- a/sparse_dot_mkl/_mkl_interface/_common.py
+++ b/sparse_dot_mkl/_mkl_interface/_common.py
@@ -770,12 +770,14 @@ def _is_valid_dtype(matrix, complex_dtype=False, all_dtype=False):
         return matrix.dtype in NUMPY_FLOAT_DTYPES
 
 
-def _type_check(matrix_a, matrix_b=None, cast=False, allow_complex=True):
+def _type_check(matrix_a, matrix_b=None, cast=False, allow_complex=True, convert=True):
     """
     Make sure that both matrices are single precision floats or both are
     double precision floats.
     If not, convert to double precision floats if cast is True,
     or raise an error if cast is False
+    If convert is set to False, the resulting data type is returned without
+    any conversion happening.
     """
 
     _n_complex = _np.iscomplexobj(matrix_a) + _np.iscomplexobj(matrix_b)
@@ -785,17 +787,17 @@ def _type_check(matrix_a, matrix_b=None, cast=False, allow_complex=True):
 
     # If there's no matrix B and matrix A is valid dtype, return it
     if matrix_b is None and _is_valid_dtype(matrix_a, all_dtype=True):
-        return matrix_a
+        return matrix_a if convert else matrix_a.dtype
 
     # If matrix A is complex but not csingle or cdouble, and cast is True,
     # convert it to a cdouble
     elif matrix_b is None and cast and _n_complex == 1:
-        return _cast_to(matrix_a, _np.cdouble)
+        return _cast_to(matrix_a, _np.cdouble) if convert else _np.cdouble
 
     # If matrix A is real but not float32 or float64, and cast is True,
     # convert it to a float64
     elif matrix_b is None and cast:
-        return _cast_to(matrix_a, _np.float64)
+        return _cast_to(matrix_a, _np.float64) if convert else _np.float64
 
     # Raise an error - the dtype is invalid and cast is False
     elif matrix_b is None:
@@ -809,7 +811,7 @@ def _type_check(matrix_a, matrix_b=None, cast=False, allow_complex=True):
         _is_valid_dtype(matrix_a, all_dtype=True) and
         matrix_a.dtype == matrix_b.dtype
     ):
-        return matrix_a, matrix_b
+        return (matrix_a, matrix_b) if convert else matrix_a.dtype
 
     # If neither matrix is complex and cast is True, convert to float64s
     # and return them
@@ -818,7 +820,7 @@ def _type_check(matrix_a, matrix_b=None, cast=False, allow_complex=True):
             f"Recasting matrix data types {matrix_a.dtype} and "
             f"{matrix_b.dtype} to np.float64"
         )
-        return _cast_to(matrix_a, _np.float64), _cast_to(matrix_b, _np.float64)
+        return (_cast_to(matrix_a, _np.float64), _cast_to(matrix_b, _np.float64)) if convert else _np.float64
 
     # If both matrices are complex and cast is True, convert to cdoubles
     # and return them
@@ -827,7 +829,7 @@ def _type_check(matrix_a, matrix_b=None, cast=False, allow_complex=True):
             f"Recasting matrix data types {matrix_a.dtype} and "
             f"{matrix_b.dtype} to _np.cdouble"
         )
-        return _cast_to(matrix_a, _np.cdouble), _cast_to(matrix_b, _np.cdouble)
+        return (_cast_to(matrix_a, _np.cdouble), _cast_to(matrix_b, _np.cdouble)) if convert else _np.cdouble
 
     # Cast reals and complex matrices together
     elif (
@@ -838,7 +840,7 @@ def _type_check(matrix_a, matrix_b=None, cast=False, allow_complex=True):
         debug_print(
             f"Recasting matrix data type {matrix_b.dtype} to {matrix_a.dtype}"
         )
-        return matrix_a, _cast_to(matrix_b, matrix_a.dtype)
+        return (matrix_a, _cast_to(matrix_b, matrix_a.dtype)) if convert else matrix_a.dtype
 
     elif (
         cast and
@@ -848,14 +850,14 @@ def _type_check(matrix_a, matrix_b=None, cast=False, allow_complex=True):
         debug_print(
             f"Recasting matrix data type {matrix_a.dtype} to {matrix_b.dtype}"
         )
-        return _cast_to(matrix_a, matrix_b.dtype), matrix_b
+        return (_cast_to(matrix_a, matrix_b.dtype), matrix_b) if convert else matrix_b.dtype
 
     elif cast and _n_complex == 1:
         debug_print(
             f"Recasting matrix data type {matrix_a.dtype} and {matrix_b.dtype}"
             f" to np.cdouble"
         )
-        return _cast_to(matrix_a, _np.cdouble), _cast_to(matrix_b, _np.cdouble)
+        return (_cast_to(matrix_a, _np.cdouble), _cast_to(matrix_b, _np.cdouble)) if convert else _np.cdouble
 
     # If cast is False, can't cast anything together
     elif not cast:
@@ -884,7 +886,7 @@ def _mkl_scalar(scalar, complex_type, double_precision):
 
 def _out_matrix(shape, dtype, order="C", out_arr=None, out_t=False):
     """
-    Create an all-zero matrix or check to make sure that
+    Create an undefined matrix or check to make sure that
     the provided output array matches
 
     :param shape: Required output shape
@@ -905,7 +907,7 @@ def _out_matrix(shape, dtype, order="C", out_arr=None, out_t=False):
 
     # If there's no output array allocate a new array and return it
     if out_arr is None:
-        return _np.zeros(shape, dtype=dtype, order=order)
+        return _np.ndarray(shape, dtype=dtype, order=order)
 
     # Check and make sure the order is correct
     # Note 1d arrays have both flags set
diff --git a/sparse_dot_mkl/_sparse_dense.py b/sparse_dot_mkl/_sparse_dense.py
index c4200cf..958feca 100644
--- a/sparse_dot_mkl/_sparse_dense.py
+++ b/sparse_dot_mkl/_sparse_dense.py
@@ -106,7 +106,7 @@ def _sparse_dense_matmul(
 
         # Create a C struct if necessary to be passed
         scalar = _mkl_scalar(scalar, cplx, dbl)
-        out_scalar = _mkl_scalar(out_scalar, cplx, dbl)
+        out_scalar = _mkl_scalar(0 if out is None else out_scalar, cplx, dbl)
 
         ret_val = func(
             11 if transpose else 10,
@@ -165,14 +165,15 @@ def _sparse_dot_dense(
         debug_print(
             "Skipping multiplication because A (dot) B must yield empty matrix"
         )
-        final_dtype = (
-            np.float64
-            if matrix_a.dtype != matrix_b.dtype or matrix_a.dtype != np.float32
-            else np.float32
-        )
-        return _out_matrix(
-            (matrix_a.shape[0], matrix_b.shape[1]), final_dtype, out_arr=out
+        output_arr = _out_matrix(
+            (matrix_a.shape[0], matrix_b.shape[1]),
+            _type_check(matrix_a, matrix_b, cast=cast, convert=False), out_arr=out
         )
+        if out is None or (out_scalar is not None and not out_scalar):
+            output_arr.fill(0)
+        elif out_scalar is not None:
+            output_arr *= out_scalar
+        return output_arr
 
     matrix_a, matrix_b = _type_check(matrix_a, matrix_b, cast=cast)
 
diff --git a/sparse_dot_mkl/_sparse_sparse.py b/sparse_dot_mkl/_sparse_sparse.py
index 5fd70ab..c7523b4 100644
--- a/sparse_dot_mkl/_sparse_sparse.py
+++ b/sparse_dot_mkl/_sparse_sparse.py
@@ -169,16 +169,19 @@ def _sparse_dot_sparse(
     # Check for edge condition inputs which result in empty outputs
     if _empty_output_check(matrix_a, matrix_b):
 
+        final_dtype = _type_check(matrix_a, matrix_b, cast=cast, convert=False)
         if dense:
-            return _out_matrix(
+            output_arr = _out_matrix(
                 (matrix_a.shape[0], matrix_b.shape[1]),
-                matrix_a.dtype,
+                final_dtype,
                 out_arr=out
             )
+            output_arr.fill(0)
+            return output_arr
         else:
             return default_output(
                 (matrix_a.shape[0], matrix_b.shape[1]),
-                dtype=matrix_a.dtype
+                dtype=final_dtype
             )
 
     # Check dtypes
diff --git a/sparse_dot_mkl/_sparse_sypr.py b/sparse_dot_mkl/_sparse_sypr.py
index 054bdff..71109a8 100644
--- a/sparse_dot_mkl/_sparse_sypr.py
+++ b/sparse_dot_mkl/_sparse_sypr.py
@@ -69,8 +69,8 @@ def _sypr_sparse_A_dense_B(
         matrix_b,
         layout_b,
         ld_b,
-        float(out_scalar) if a_scalar is not None else 1.0,
-        float(out_scalar) if out_scalar is not None else 1.0,
+        float(a_scalar) if a_scalar is not None else 1.0,
+        0.0 if out is None else (float(out_scalar) if out_scalar is not None else 1.0),
         output_arr,
         output_layout,
         output_ld,
diff --git a/sparse_dot_mkl/_sparse_vector.py b/sparse_dot_mkl/_sparse_vector.py
index f28202d..ed1e653 100644
--- a/sparse_dot_mkl/_sparse_vector.py
+++ b/sparse_dot_mkl/_sparse_vector.py
@@ -56,14 +56,6 @@ def _sparse_dense_vector_mult(
     output_shape = matrix_a.shape[1] if transpose else matrix_a.shape[0]
     output_shape = (output_shape,) if vector_b.ndim == 1 else (output_shape, 1)
 
-    if _empty_output_check(matrix_a, vector_b):
-        final_dtype = (
-            np.float64
-            if matrix_a.dtype != vector_b.dtype or matrix_a.dtype != np.float32
-            else np.float32
-        )
-        return _out_matrix(output_shape, final_dtype, out_arr=out)
-
     mkl_a, dbl, cplx = _create_mkl_sparse(matrix_a)
     vector_b = vector_b.ravel()
 
@@ -75,7 +67,7 @@ def _sparse_dense_vector_mult(
 
     # Create a C struct if necessary to be passed
     scalar = _mkl_scalar(scalar, cplx, dbl)
-    out_scalar = _mkl_scalar(out_scalar, cplx, dbl)
+    out_scalar = _mkl_scalar(0 if out is None else out_scalar, cplx, dbl)
 
     output_arr = _out_matrix(
         output_shape,
@@ -135,6 +127,18 @@ def _sparse_dot_vector(
     """
 
     _sanity_check(mv_a, mv_b, allow_vector=True)
+
+    if _empty_output_check(mv_a, mv_b):
+        output_arr = _out_matrix(
+            (mv_a.shape[0],) if mv_b.ndim == 1 else (mv_a.shape[0], 1),
+            _type_check(mv_a, mv_b, cast=cast, convert=False), out_arr=out
+        )
+        if out is None or (out_scalar is not None and not out_scalar):
+            output_arr.fill(0)
+        elif out_scalar is not None:
+            output_arr *= out_scalar
+        return output_arr
+
     mv_a, mv_b = _type_check(mv_a, mv_b, cast=cast)
 
     if (

From 8d5ddf14e08e9006e5d97ac8ec070f1e340e3c31 Mon Sep 17 00:00:00 2001
From: Benjamin Desef <projekter-git@yahoo.de>
Date: Wed, 6 Nov 2024 14:10:12 +0100
Subject: [PATCH 2/6] Some more Gram matrix emptyness safety

---
 sparse_dot_mkl/_gram_matrix.py | 41 +++++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/sparse_dot_mkl/_gram_matrix.py b/sparse_dot_mkl/_gram_matrix.py
index cbf6813..2bbea66 100644
--- a/sparse_dot_mkl/_gram_matrix.py
+++ b/sparse_dot_mkl/_gram_matrix.py
@@ -283,6 +283,22 @@ def _gram_matrix(
     :rtype: scipy.sparse.csr_matrix, np.ndarray
     """
 
+    if _sps.issparse(matrix) and not (is_csr(matrix) or is_csc(matrix)):
+        raise ValueError(
+            "gram_matrix requires sparse matrix to be CSR or CSC format"
+        )
+    elif is_csc(matrix) and not cast:
+        raise ValueError(
+            "gram_matrix cannot use a CSC matrix unless cast=True"
+        )
+    elif out is not None and not dense:
+        raise ValueError(
+            "out argument cannot be used with sparse (dot) sparse "
+            "matrix multiplication"
+        )
+    elif out is not None and not isinstance(out, np.ndarray):
+        raise ValueError("out argument must be dense")
+
     # Check for edge condition inputs which result in empty outputs
     if _empty_output_check(matrix, matrix):
         debug_print(
@@ -294,8 +310,14 @@ def _gram_matrix(
             if transpose
             else (matrix.shape[0], matrix.shape[0])
         )
-        output_func = _sps.csr_matrix if _sps.isspmatrix(matrix) else np.zeros
-        return output_func(output_shape, dtype=matrix.dtype)
+        if out is None:
+            output_func = np.zeros if dense else _sps.csr_matrix
+            return output_func(output_shape, dtype=matrix.dtype)
+        elif out_scalar is not None and not out_scalar:
+            out.fill(0)
+        elif out_scalar is not None:
+            out *= out_scalar
+        return out
 
     if np.iscomplexobj(matrix):
         raise ValueError(
@@ -304,15 +326,7 @@ def _gram_matrix(
 
     matrix = _type_check(matrix, cast=cast)
 
-    if _sps.issparse(matrix) and not (is_csr(matrix) or is_csc(matrix)):
-        raise ValueError(
-            "gram_matrix requires sparse matrix to be CSR or CSC format"
-        )
-    elif is_csc(matrix) and not cast:
-        raise ValueError(
-            "gram_matrix cannot use a CSC matrix unless cast=True"
-        )
-    elif not _sps.issparse(matrix):
+    if not _sps.issparse(matrix):
         return _gram_matrix_dense_to_dense(
             matrix,
             aat=transpose,
@@ -326,11 +340,6 @@ def _gram_matrix(
             out=out,
             out_scalar=out_scalar
         )
-    elif out is not None:
-        raise ValueError(
-            "out argument cannot be used with sparse (dot) sparse "
-            "matrix multiplication"
-        )
     else:
         return _gram_matrix_sparse(
             matrix,

From 201fd1972e24ec78afa1756e93a4105e0c578826 Mon Sep 17 00:00:00 2001
From: Benjamin Desef <projekter-git@yahoo.de>
Date: Wed, 6 Nov 2024 14:34:11 +0100
Subject: [PATCH 3/6] Adjust Gram matrix tests: now the lower triangle is
 really undefined

---
 sparse_dot_mkl/tests/test_gram_matrix.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/sparse_dot_mkl/tests/test_gram_matrix.py b/sparse_dot_mkl/tests/test_gram_matrix.py
index 4ffdacd..d11fa63 100644
--- a/sparse_dot_mkl/tests/test_gram_matrix.py
+++ b/sparse_dot_mkl/tests/test_gram_matrix.py
@@ -24,11 +24,13 @@ def setUp(self):
         self.mat1_d = self.MATRIX_1.toarray()
 
         gram_ut = np.dot(self.MATRIX_1.toarray().T, self.MATRIX_1.toarray())
-        gram_ut[np.tril_indices(gram_ut.shape[0], k=-1)] = 0.0
+        self.tril = np.tril_indices(gram_ut.shape[0], k=-1)
+        gram_ut[self.tril] = 0.0
         self.gram_ut = gram_ut
 
         gram_ut_t = np.dot(self.MATRIX_1.toarray(), self.MATRIX_1.toarray().T)
-        gram_ut_t[np.tril_indices(gram_ut_t.shape[0], k=-1)] = 0.0
+        self.tril_t = np.tril_indices(gram_ut_t.shape[0], k=-1)
+        gram_ut_t[self.tril_t] = 0.0
         self.gram_ut_t = gram_ut_t
 
 
@@ -52,6 +54,7 @@ def test_gram_matrix_sp_single(self):
 
     def test_gram_matrix_d_single(self):
         mat2 = gram_matrix_mkl(self.mat1.astype(self.single_dtype), dense=True)
+        mat2[self.tril] = 0.0 # lower triangle is undefined
         np_almost_equal(mat2, self.gram_ut, decimal=5)
 
         mat2 = gram_matrix_mkl(
@@ -63,7 +66,7 @@ def test_gram_matrix_d_single(self):
             ),
             out_scalar=1.0,
         )
-        mat2[np.tril_indices(mat2.shape[0], k=-1)] = 0.0
+        mat2[self.tril] = 0.0 # lower triangle is undefined
         np_almost_equal(mat2, self.gram_ut, decimal=5)
 
         with self.assertRaises(ValueError):
@@ -78,6 +81,7 @@ def test_gram_matrix_d(self):
         print(self.mat1)
 
         mat2 = gram_matrix_mkl(self.mat1, dense=True)
+        mat2[self.tril] = 0.0 # lower triangle is undefined
         print(mat2 - self.gram_ut)
         print(mat2[np.tril_indices(mat2.shape[0], k=1)])
 
@@ -92,7 +96,7 @@ def test_gram_matrix_d(self):
             ),
             out_scalar=1.0,
         )
-        mat2[np.tril_indices(mat2.shape[0], k=-1)] = 0.0
+        mat2[self.tril] = 0.0 # lower triangle is undefined
         np_almost_equal(mat2, self.gram_ut)
 
     def test_gram_matrix_sp_t(self):
@@ -101,6 +105,7 @@ def test_gram_matrix_sp_t(self):
 
     def test_gram_matrix_d_t(self):
         mat2 = gram_matrix_mkl(self.mat1, dense=True, transpose=True)
+        mat2[self.tril_t] = 0.0 # lower triangle is undefined
         np_almost_equal(mat2, self.gram_ut_t)
 
     def test_gram_matrix_csc_sp(self):
@@ -110,6 +115,7 @@ def test_gram_matrix_csc_sp(self):
     def test_gram_matrix_csc_d(self):
         mat = self.mat1.tocsc()
         mat2 = gram_matrix_mkl(mat, dense=True, cast=True)
+        mat2[self.tril] = 0.0 # lower triangle is undefined
         np_almost_equal(mat.toarray(), self.mat1.toarray())
         np_almost_equal(mat2, self.gram_ut)
 
@@ -117,6 +123,7 @@ def test_gram_matrix_csc_d(self):
 class TestGramMatrixDense(TestGramMatrix):
     def test_gram_matrix_dd_double(self):
         mat2 = gram_matrix_mkl(self.mat1.toarray(), dense=True)
+        mat2[self.tril] = 0.0 # lower triangle is undefined
         np_almost_equal(mat2, self.gram_ut)
 
         mat2 = gram_matrix_mkl(
@@ -128,12 +135,14 @@ def test_gram_matrix_dd_double(self):
             ),
             out_scalar=1.0,
         )
+        mat2[self.tril] = 0.0 # lower triangle is undefined
         np_almost_equal(mat2, self.gram_ut)
 
     def test_gram_matrix_dd_single(self):
         mat2 = gram_matrix_mkl(
             self.mat1.astype(self.single_dtype).toarray(), dense=True
         )
+        mat2[self.tril] = 0.0 # lower triangle is undefined
         np_almost_equal(mat2, self.gram_ut, decimal=5)
 
         mat2 = gram_matrix_mkl(
@@ -145,6 +154,7 @@ def test_gram_matrix_dd_single(self):
             ),
             out_scalar=1.0,
         )
+        mat2[self.tril] = 0.0 # lower triangle is undefined
         np_almost_equal(mat2, self.gram_ut, decimal=5)
 
     def test_gram_matrix_dd_double_F(self):
@@ -152,6 +162,7 @@ def test_gram_matrix_dd_double_F(self):
             np.asarray(self.mat1.toarray(), order="F"),
             dense=True
         )
+        mat2[self.tril] = 0.0 # lower triangle is undefined
         np_almost_equal(mat2, self.gram_ut)
 
         mat2 = gram_matrix_mkl(
@@ -164,6 +175,7 @@ def test_gram_matrix_dd_double_F(self):
             ),
             out_scalar=1.0,
         )
+        mat2[self.tril] = 0.0 # lower triangle is undefined
         np_almost_equal(mat2, self.gram_ut)
 
     def test_gram_matrix_dd_single_F(self):
@@ -174,6 +186,7 @@ def test_gram_matrix_dd_single_F(self):
             ),
             dense=True,
         )
+        mat2[self.tril] = 0.0 # lower triangle is undefined
         np_almost_equal(mat2, self.gram_ut, decimal=5)
 
         mat2 = gram_matrix_mkl(
@@ -189,6 +202,7 @@ def test_gram_matrix_dd_single_F(self):
             ),
             out_scalar=1.0,
         )
+        mat2[self.tril] = 0.0 # lower triangle is undefined
         np_almost_equal(mat2, self.gram_ut, decimal=5)
 
 

From 1b81a3cf2db654f944029b1aca23e3d6227ca6b1 Mon Sep 17 00:00:00 2001
From: Benjamin Desef <projekter-git@yahoo.de>
Date: Wed, 6 Nov 2024 14:34:16 +0100
Subject: [PATCH 4/6] Typo

---
 sparse_dot_mkl/_gram_matrix.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sparse_dot_mkl/_gram_matrix.py b/sparse_dot_mkl/_gram_matrix.py
index 2bbea66..878cb85 100644
--- a/sparse_dot_mkl/_gram_matrix.py
+++ b/sparse_dot_mkl/_gram_matrix.py
@@ -93,7 +93,7 @@ def _gram_matrix_sparse(
 
 
 # Dict keyed by ('double_precision_bool', 'complex_bool')
-_mkl_skryd_funcs = {
+_mkl_syrkd_funcs = {
     (False, False): MKL._mkl_sparse_s_syrkd,
     (True, False): MKL._mkl_sparse_d_syrkd,
     (False, True): MKL._mkl_sparse_c_syrkd,
@@ -129,7 +129,7 @@ def _gram_matrix_sparse_to_dense(
     _order_mkl_handle(sp_ref_a)
 
     out_dtype = _output_dtypes[(double_prec, complex_type)]
-    func = _mkl_skryd_funcs[(double_prec, complex_type)]
+    func = _mkl_syrkd_funcs[(double_prec, complex_type)]
 
     out_dim = matrix_a.shape[0 if aat else 1]
 

From 4b7379abec91e56d25498e19ce66abd8aa0e7ff2 Mon Sep 17 00:00:00 2001
From: Benjamin Desef <projekter-git@yahoo.de>
Date: Thu, 7 Nov 2024 13:24:20 +0100
Subject: [PATCH 5/6] Always check for invalid format first

---
 sparse_dot_mkl/_sparse_vector.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/sparse_dot_mkl/_sparse_vector.py b/sparse_dot_mkl/_sparse_vector.py
index ed1e653..4fd62f7 100644
--- a/sparse_dot_mkl/_sparse_vector.py
+++ b/sparse_dot_mkl/_sparse_vector.py
@@ -126,6 +126,14 @@ def _sparse_dot_vector(
     :rtype: np.ndarray
     """
 
+    if (
+        not _is_allowed_sparse_format(mv_a) or
+        not _is_allowed_sparse_format(mv_b)
+    ):
+        raise ValueError(
+            "Only CSR, CSC, and BSR-type sparse matrices are supported"
+        )
+
     _sanity_check(mv_a, mv_b, allow_vector=True)
 
     if _empty_output_check(mv_a, mv_b):
@@ -141,14 +149,7 @@ def _sparse_dot_vector(
 
     mv_a, mv_b = _type_check(mv_a, mv_b, cast=cast)
 
-    if (
-        not _is_allowed_sparse_format(mv_a) or
-        not _is_allowed_sparse_format(mv_b)
-    ):
-        raise ValueError(
-            "Only CSR, CSC, and BSR-type sparse matrices are supported"
-        )
-    elif _is_dense_vector(mv_b):
+    if _is_dense_vector(mv_b):
         return _sparse_dense_vector_mult(
             mv_a,
             mv_b,

From 4f168ebcd44ecfa5cb6ac762c2e9ac2cc18b1791 Mon Sep 17 00:00:00 2001
From: asistradition <cj59@nyu.edu>
Date: Thu, 7 Nov 2024 13:13:08 -0500
Subject: [PATCH 6/6] Retain gram_matrix behavior

---
 sparse_dot_mkl/_gram_matrix.py           | 33 +++++++++++------
 sparse_dot_mkl/_mkl_interface/_common.py | 13 ++++++-
 sparse_dot_mkl/tests/test_gram_matrix.py | 47 ++++++++++--------------
 3 files changed, 52 insertions(+), 41 deletions(-)

diff --git a/sparse_dot_mkl/_gram_matrix.py b/sparse_dot_mkl/_gram_matrix.py
index 878cb85..3c7a0c2 100644
--- a/sparse_dot_mkl/_gram_matrix.py
+++ b/sparse_dot_mkl/_gram_matrix.py
@@ -136,7 +136,10 @@ def _gram_matrix_sparse_to_dense(
     output_arr = _out_matrix(
         (out_dim, out_dim),
         out_dtype,
-        order="C", out_arr=out)
+        order="C",
+        out_arr=out,
+        initialize_zeros=True
+    )
     _, output_ld = _get_numpy_layout(output_arr)
 
     if _empty_output_check(matrix_a, matrix_a):
@@ -146,9 +149,12 @@ def _gram_matrix_sparse_to_dense(
         elif out_scalar is not None:
             output_arr *= out_scalar
         return output_arr
+    
+    if out is None:
+        out_scalar = 0.
 
     scalar = _mkl_scalar(scalar, complex_type, double_prec)
-    out_scalar = _mkl_scalar(0 if out is None else out_scalar, complex_type, double_prec)
+    out_scalar = _mkl_scalar(out_scalar, complex_type, double_prec)
 
     ret_val = func(
         _mkl_sp_transpose_ops[(not aat, complex_type)],
@@ -169,7 +175,7 @@ def _gram_matrix_sparse_to_dense(
     # matrix. This stupid thing only happens with specific flags
     # I could probably leave it but it's pretty annoying
 
-    if not aat and out is None and not complex_type:
+    if not aat and out is None:
         output_arr[np.tril_indices(output_arr.shape[0], k=-1)] = 0.0
 
     return output_arr
@@ -212,6 +218,13 @@ def _gram_matrix_dense_to_dense(
     :rtype: numpy.ndarray
     """
 
+    if aat and np.iscomplexobj(matrix_a):
+        raise ValueError(
+            "transpose=True with dense complex data currently "
+            "fails with an Intel oneMKL ERROR: "
+            "Parameter 3 was incorrect on entry to cblas_csyrk"
+        )
+
     # Get dimensions
     n, k = matrix_a.shape if aat else matrix_a.shape[::-1]
 
@@ -227,15 +240,16 @@ def _gram_matrix_dense_to_dense(
         (n, n),
         out_dtype,
         order="C" if layout_a == LAYOUT_CODE_C else "F",
-        out_arr=out
+        out_arr=out,
+        initialize_zeros=True
     )
 
     # The complex versions of these functions take void pointers instead of
     # passed structs, so create a C struct if necessary to be passed by
     # reference
     scalar = _mkl_scalar(scalar, complex_type, double_precision)
-    out_scalar = _mkl_scalar(0 if out is None else out_scalar, complex_type, double_precision)
-
+    out_scalar = _mkl_scalar(out_scalar, complex_type, double_precision)
+    
     func(
         layout_a,
         MKL_UPPER,
@@ -247,7 +261,7 @@ def _gram_matrix_dense_to_dense(
         ld_a,
         out_scalar if not complex_type else _ctypes.byref(scalar),
         output_arr,
-        n,
+        n
     )
 
     return output_arr
@@ -319,11 +333,6 @@ def _gram_matrix(
             out *= out_scalar
         return out
 
-    if np.iscomplexobj(matrix):
-        raise ValueError(
-            "gram_matrix_mkl does not support complex datatypes"
-        )
-
     matrix = _type_check(matrix, cast=cast)
 
     if not _sps.issparse(matrix):
diff --git a/sparse_dot_mkl/_mkl_interface/_common.py b/sparse_dot_mkl/_mkl_interface/_common.py
index 74e4854..02fcfe7 100644
--- a/sparse_dot_mkl/_mkl_interface/_common.py
+++ b/sparse_dot_mkl/_mkl_interface/_common.py
@@ -884,7 +884,14 @@ def _mkl_scalar(scalar, complex_type, double_precision):
         return float(scalar)
 
 
-def _out_matrix(shape, dtype, order="C", out_arr=None, out_t=False):
+def _out_matrix(
+    shape,
+    dtype,
+    order="C",
+    out_arr=None,
+    out_t=False,
+    initialize_zeros=False
+):
     """
     Create an undefined matrix or check to make sure that
     the provided output array matches
@@ -906,7 +913,9 @@ def _out_matrix(shape, dtype, order="C", out_arr=None, out_t=False):
     out_t = False if out_t is None else out_t
 
     # If there's no output array allocate a new array and return it
-    if out_arr is None:
+    if out_arr is None and initialize_zeros:
+        return _np.zeros(shape, dtype=dtype, order=order)
+    elif out_arr is None:
         return _np.ndarray(shape, dtype=dtype, order=order)
 
     # Check and make sure the order is correct
diff --git a/sparse_dot_mkl/tests/test_gram_matrix.py b/sparse_dot_mkl/tests/test_gram_matrix.py
index d11fa63..ecd6e02 100644
--- a/sparse_dot_mkl/tests/test_gram_matrix.py
+++ b/sparse_dot_mkl/tests/test_gram_matrix.py
@@ -24,13 +24,11 @@ def setUp(self):
         self.mat1_d = self.MATRIX_1.toarray()
 
         gram_ut = np.dot(self.MATRIX_1.toarray().T, self.MATRIX_1.toarray())
-        self.tril = np.tril_indices(gram_ut.shape[0], k=-1)
-        gram_ut[self.tril] = 0.0
+        gram_ut[np.tril_indices(gram_ut.shape[0], k=-1)] = 0.0
         self.gram_ut = gram_ut
 
         gram_ut_t = np.dot(self.MATRIX_1.toarray(), self.MATRIX_1.toarray().T)
-        self.tril_t = np.tril_indices(gram_ut_t.shape[0], k=-1)
-        gram_ut_t[self.tril_t] = 0.0
+        gram_ut_t[np.tril_indices(gram_ut_t.shape[0], k=-1)] = 0.0
         self.gram_ut_t = gram_ut_t
 
 
@@ -54,7 +52,6 @@ def test_gram_matrix_sp_single(self):
 
     def test_gram_matrix_d_single(self):
         mat2 = gram_matrix_mkl(self.mat1.astype(self.single_dtype), dense=True)
-        mat2[self.tril] = 0.0 # lower triangle is undefined
         np_almost_equal(mat2, self.gram_ut, decimal=5)
 
         mat2 = gram_matrix_mkl(
@@ -66,7 +63,7 @@ def test_gram_matrix_d_single(self):
             ),
             out_scalar=1.0,
         )
-        mat2[self.tril] = 0.0 # lower triangle is undefined
+        mat2[np.tril_indices(mat2.shape[0], k=-1)] = 0.0
         np_almost_equal(mat2, self.gram_ut, decimal=5)
 
         with self.assertRaises(ValueError):
@@ -78,12 +75,7 @@ def test_gram_matrix_d_single(self):
             )
 
     def test_gram_matrix_d(self):
-        print(self.mat1)
-
         mat2 = gram_matrix_mkl(self.mat1, dense=True)
-        mat2[self.tril] = 0.0 # lower triangle is undefined
-        print(mat2 - self.gram_ut)
-        print(mat2[np.tril_indices(mat2.shape[0], k=1)])
 
         np_almost_equal(mat2, self.gram_ut)
 
@@ -96,7 +88,7 @@ def test_gram_matrix_d(self):
             ),
             out_scalar=1.0,
         )
-        mat2[self.tril] = 0.0 # lower triangle is undefined
+        mat2[np.tril_indices(mat2.shape[0], k=-1)] = 0.0
         np_almost_equal(mat2, self.gram_ut)
 
     def test_gram_matrix_sp_t(self):
@@ -105,7 +97,6 @@ def test_gram_matrix_sp_t(self):
 
     def test_gram_matrix_d_t(self):
         mat2 = gram_matrix_mkl(self.mat1, dense=True, transpose=True)
-        mat2[self.tril_t] = 0.0 # lower triangle is undefined
         np_almost_equal(mat2, self.gram_ut_t)
 
     def test_gram_matrix_csc_sp(self):
@@ -115,7 +106,6 @@ def test_gram_matrix_csc_sp(self):
     def test_gram_matrix_csc_d(self):
         mat = self.mat1.tocsc()
         mat2 = gram_matrix_mkl(mat, dense=True, cast=True)
-        mat2[self.tril] = 0.0 # lower triangle is undefined
         np_almost_equal(mat.toarray(), self.mat1.toarray())
         np_almost_equal(mat2, self.gram_ut)
 
@@ -123,7 +113,6 @@ def test_gram_matrix_csc_d(self):
 class TestGramMatrixDense(TestGramMatrix):
     def test_gram_matrix_dd_double(self):
         mat2 = gram_matrix_mkl(self.mat1.toarray(), dense=True)
-        mat2[self.tril] = 0.0 # lower triangle is undefined
         np_almost_equal(mat2, self.gram_ut)
 
         mat2 = gram_matrix_mkl(
@@ -135,14 +124,12 @@ def test_gram_matrix_dd_double(self):
             ),
             out_scalar=1.0,
         )
-        mat2[self.tril] = 0.0 # lower triangle is undefined
         np_almost_equal(mat2, self.gram_ut)
 
     def test_gram_matrix_dd_single(self):
         mat2 = gram_matrix_mkl(
             self.mat1.astype(self.single_dtype).toarray(), dense=True
         )
-        mat2[self.tril] = 0.0 # lower triangle is undefined
         np_almost_equal(mat2, self.gram_ut, decimal=5)
 
         mat2 = gram_matrix_mkl(
@@ -154,7 +141,6 @@ def test_gram_matrix_dd_single(self):
             ),
             out_scalar=1.0,
         )
-        mat2[self.tril] = 0.0 # lower triangle is undefined
         np_almost_equal(mat2, self.gram_ut, decimal=5)
 
     def test_gram_matrix_dd_double_F(self):
@@ -162,7 +148,6 @@ def test_gram_matrix_dd_double_F(self):
             np.asarray(self.mat1.toarray(), order="F"),
             dense=True
         )
-        mat2[self.tril] = 0.0 # lower triangle is undefined
         np_almost_equal(mat2, self.gram_ut)
 
         mat2 = gram_matrix_mkl(
@@ -175,7 +160,6 @@ def test_gram_matrix_dd_double_F(self):
             ),
             out_scalar=1.0,
         )
-        mat2[self.tril] = 0.0 # lower triangle is undefined
         np_almost_equal(mat2, self.gram_ut)
 
     def test_gram_matrix_dd_single_F(self):
@@ -186,7 +170,6 @@ def test_gram_matrix_dd_single_F(self):
             ),
             dense=True,
         )
-        mat2[self.tril] = 0.0 # lower triangle is undefined
         np_almost_equal(mat2, self.gram_ut, decimal=5)
 
         mat2 = gram_matrix_mkl(
@@ -202,7 +185,6 @@ def test_gram_matrix_dd_single_F(self):
             ),
             out_scalar=1.0,
         )
-        mat2[self.tril] = 0.0 # lower triangle is undefined
         np_almost_equal(mat2, self.gram_ut, decimal=5)
 
 
@@ -227,21 +209,32 @@ def setUp(self):
         self.gram_ut_t = gram_ut_t
 
 
-@unittest.skip
 class TestGramMatrixSparseComplex(_TestGramMatrixComplex, TestGramMatrixSparse):
     pass
 
-
-@unittest.skip
 class TestGramMatrixDenseComplex(_TestGramMatrixComplex, TestGramMatrixDense):
-    pass
 
+    @unittest.skip
+    def test_gram_matrix_dd_double(self):
+        pass
+
+    @unittest.skip
+    def test_gram_matrix_dd_double_F(self):
+        pass
+
+    @unittest.skip
+    def test_gram_matrix_dd_single(self):
+        pass
+
+    @unittest.skip
+    def test_gram_matrix_dd_single_F(self):
+        pass
 
 try:
     from scipy.sparse import (
         csr_array
     )
-
+    @unittest.skip
     class TestGramMatrixSparseArray(TestGramMatrixSparse):
         sparse_func = csr_array