-
Notifications
You must be signed in to change notification settings - Fork 1
/
maxvol_gpu.py
66 lines (52 loc) · 2.18 KB
/
maxvol_gpu.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import cupy as cp
from cupyx.scipy.linalg import lu, solve_triangular
from time import time
"""
The following code comes from https://github.com/AndreiChertkov/teneva
"""
def maxvol(A, e, k):
A = cp.array(A)
"""Compute the maximal-volume submatrix for given tall matrix.
Args:
A (np.ndarray): tall matrix of the shape [n, r] (n > r).
e (float): accuracy parameter (should be >= 1). If the parameter is
equal to 1, then the maximum number of iterations will be performed
until true convergence is achieved. If the value is greater than
one, the algorithm will complete its work faster, but the accuracy
will be slightly lower (in most cases, the optimal value is within
the range of 1.01 - 1.1).
k (int): maximum number of iterations (should be >= 1).
Returns:
(np.ndarray, np.ndarray): the row numbers I containing the maximal
volume submatrix in the form of 1D array of length r and coefficient
matrix B in the form of 2D array of shape [n, r], such that
A = B A[I, :] and A (A[I, :])^{-1} = B.
Note:
The description of the basic implementation of this algorithm is
presented in the work: Goreinov S., Oseledets, I., Savostyanov, D.,
Tyrtyshnikov, E., Zamarashkin, N. "How to find a good submatrix".
Matrix Methods: Theory, Algorithms And Applications: Dedicated to the Memory of Gene Golub (2010): 247-256.
"""
n, r = A.shape
if n <= r:
raise ValueError('Input matrix should be "tall"')
P, L, U = lu(A, check_finite=False)
I = P[:, :r].argmax(axis=0)
Q = solve_triangular(U, A.T, trans=1, check_finite=False)
B = solve_triangular(
L[:r, :], Q, trans=1, check_finite=False, unit_diagonal=True, lower=True
).T
t0 = time()
for iter in range(k):
i, j = cp.divmod(cp.abs(B).argmax(), r)
E = cp.abs(B[i, j])
if E <= e:
v = iter / (time() - t0)
print(f"Maxvol Speed: {int(v)} iters/s")
break
I[j] = i
bj = B[:, j]
bi = B[i, :].copy()
bi[j] -= 1.0
B -= cp.outer(bj, bi / B[i, j])
return I.get()