-
Notifications
You must be signed in to change notification settings - Fork 2
/
gemmManagedOutOfCore.cu
52 lines (44 loc) · 1.13 KB
/
gemmManagedOutOfCore.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#include <stdlib.h>
#include <stdio.h>
#include <cuda_runtime.h>
#include <cublas.h>
#include "common.hh"
int
main(int argc, char *argv[])
{
size_t N = 16000;
clock_t start, end;
cublasHandle_t handle;
float *a, *b, *c;
const float alpha = 1;
const float beta = 0;
size_t count, nn;
if (argc == 2) {
N = checked_strtosize(argv[1]);
}
nn = checked_mul(N, N);
count = checked_mul(nn, sizeof(float));
check(cublasCreate(&handle));
start = clock();
check(cudaMallocManaged(&a, count));
check(cudaMallocManaged(&b, count));
check(cudaMallocManaged(&c, count));
for (size_t i = 0; i < N*N; i++) {
a[i] = i / 37.0;
b[i] = i / 101.0;
}
end = clock();
log("host: MallocManaged+init", start, end);
start = clock();
check(cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N,
N, N, N,
&alpha,
a, N,
b, N,
&beta,
c, N));
check(cudaDeviceSynchronize());
end = clock();
log("cublasSgemm", start, end);
return 0;
}