-
Notifications
You must be signed in to change notification settings - Fork 16
/
sparse_n_dense.R
53 lines (46 loc) · 1.69 KB
/
sparse_n_dense.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# Investigate matrix products involving mixed sparse and dense matrices.
# We compute 'crossprod(A)' two ways:
# 1. Using the usual dense linear algebra code path
# 2. Computing sparse and dense parts separately and then combining
#
# The perofrmance difference you will see is highly sensitive to your
# CPU architecture, the BLAS library that you're using, the size of
# the problem, and the ratio of sparse to dense portions.
# Set up an example:
m = 100000 # Number of rows
ns = 900 # Number of sparse columns
nd = 100 # Number of dense columns
n = nd + ns
# The 1st nd columns of A will be dense, the rest sparse (about 1% fill-in).
set.seed(1)
A = cbind(
matrix(rnorm(m*nd),nrow=m),
matrix(sample(0:1,prob=c(0.99,0.01),size=ns*m,replace=TRUE), nrow=m))
# Time forming the matrix cross product
t1 = proc.time()
ATA = crossprod(A)
t1 = proc.time() - t1
# Split A into dense and sparse parts
library("Matrix")
A_dense = Matrix(A[,1:nd],sparse=FALSE)
A_sparse = Matrix(A[,(nd+1):n],sparse=TRUE)
# Time split version
t2 = proc.time()
X11 = crossprod(A_dense)
X12 = crossprod(A_dense,A_sparse)
X21 = crossprod(A_sparse,A_dense)
X22 = crossprod(A_sparse)
X = matrix(0,nrow=n,ncol=n)
X[1:nd,1:nd] = as.matrix(X11)
X[1:nd,(nd+1):n] = as.matrix(X12)
X[(nd+1):n,1:nd] = as.matrix(X21)
X[(nd+1):n,(nd+1):n] = as.matrix(X22)
t2 = proc.time() - t2
# Print our timings:
cat("Time to compute the usual crossprod(A):\n")
print(t1)
cat("Time to compute crossprod(A) by splitting into sparse and dense parts:\n")
print(t2)
# Verify that we compute the same thing, within expected numerical accuracy:
cat("Frobenius norm of the difference of the two methods output:\n")
print(sqrt(norm(ATA - X,"F")))