-
Notifications
You must be signed in to change notification settings - Fork 0
/
matrix_mult_omp.c
executable file
·126 lines (102 loc) · 2.52 KB
/
matrix_mult_omp.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <omp.h>
#include <time.h>
#define EPS (1e-10)
#define SEED_A (1)
#define SEED_B (2)
int N;
void generate_matrix(float* M, int size, int seed);
void mult_square_matrices(float* A, float* B, float* C, int size);
void mult_square_matrices_opt(float* A, float* B, float* C, int size);
void print_matrix(float* M, int size);
void transpose(float* M, int size);
int main(int argc, char** argv)
{
N = atoi(argv[1]);
float *A, *B, *C;
double start_time, finish_time;
double exec_time;
// Allocate memory for all matrices
A = (float*) calloc(N*N, sizeof(float));
B = (float*) calloc(N*N, sizeof(float));
C = (float*) calloc(N*N, sizeof(float));
// Generate matrices
generate_matrix(A, N, SEED_A);
generate_matrix(B, N, SEED_B);
// Multiply matrices
start_time = omp_get_wtime();
mult_square_matrices_opt(A, B, C, N);
finish_time = omp_get_wtime();
// Get execution time
exec_time = (finish_time - start_time);
// Print execution time
printf("Multiplication time: %.5lf\n", exec_time);
// Print result
//print_matrix(A, N);
//print_matrix(B, N);
//print_matrix(C, N);
free(A); free(B); free(C);
return 0;
}
void mult_square_matrices(float* A, float* B, float* C, int size)
{
int i, j, k;
// parallelization
#pragma omp parallel for private(j,k)
for(i = 0; i < size; ++i)
for(j = 0; j < size; ++j) {
C[i*size + j] = 0.0;
for(k = 0; k < size; ++k)
C[i*size + j] += A[i*size + k] * B[k*size + j];
}
}
void mult_square_matrices_opt(float* A, float* B, float* C, int size)
{
int i, j, k;
float buf;
float *bufA, *bufB, *bufC;
transpose(B, size);
#pragma omp parallel for private(j,k,bufA,bufB,bufC)
for(i = 0; i < size; ++i) {
bufC = &(C[i*size]);
bufA = &(A[i*size]);
for(j = 0; j < size; ++j) {
bufB = &(B[j*size]);
bufC[j] = 0.0;
for(k = 0; k < size; ++k)
bufC[j] += bufA[k] * bufB[k];
}
}
transpose(B, size);
}
void transpose(float* M, int size)
{
int i, j;
float temp;
for(i = 0; i < size; ++i)
for(j = 0; j < i; ++j) {
temp = M[i * size + j];
M[i * size + j] = M[j * size + i];
M[j * size + i] = temp;
}
}
void generate_matrix(float* M, int size, int seed)
{
int i, j;
for(i = 0; i < size; ++i)
for(j = 0; j < size; ++j)
M[i*size + j] = ((float)(i + j + seed)) / (i + j + 1) * ( (i ^ seed) + 0.55);
}
void print_matrix(float* M, int size)
{
int i, j;
printf("(\n");
for(i = 0; i < size; ++i) {
for(j = 0; j < size; ++j)
printf("%.0f\t", M[i*size + j]);
printf("\n");
}
printf(")\n");
}