-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #26 from nhatdongdang/feat/gpu-multithread
Gpu multithread
- Loading branch information
Showing
10 changed files
with
321 additions
and
103 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
compile = nvcc -O3 -arch=sm_75 --use_fast_math | ||
SRC_DIR := versions | ||
BIN_DIR := bin | ||
SRC_FILES := $(wildcard $(SRC_DIR)/*.cu) | ||
EXECUTABLES := $(patsubst $(SRC_DIR)/%.cu, $(BIN_DIR)/%, $(SRC_FILES)) | ||
|
||
all: clean $(EXECUTABLES) | ||
|
||
clean: | ||
rm -f -r bin | ||
mkdir bin | ||
|
||
$(BIN_DIR)/%: $(SRC_DIR)/%.cu | ||
$(compile) $< benchmark.cu -o $@.exe | ||
|
||
plot: all | ||
python3 ./plot.py | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#include "template.cuh" | ||
#include <stdio.h> | ||
#include <time.h> | ||
|
||
int main(int argc, char* argv[]) { | ||
long n; | ||
if (argc > 1) { | ||
n = atol(argv[1]); | ||
} else { | ||
n = 100000; | ||
} | ||
printf("%f", time(n)); | ||
} |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import os | ||
import subprocess | ||
import matplotlib.pyplot as plt | ||
|
||
result = subprocess.run(['make'], capture_output=True, text=True) | ||
# Define the folder containing the executables | ||
folder_path = './bin' # Change this to your bin folder path | ||
|
||
# Define the input sizes to test | ||
start=10000 | ||
end=10000 | ||
step=100000 | ||
|
||
input_sizes = list(range(start, end+1, step)) | ||
# Initialize a dictionary to store runtimes for each executable | ||
runtimes = {exe: [] for exe in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, exe))} | ||
|
||
# Loop through each executable | ||
for exe in runtimes.keys(): | ||
exe_path = os.path.join(folder_path, exe) | ||
|
||
# Loop through each input size | ||
for n in range(start,end+1,step): | ||
# Run the executable with the input size and capture its output | ||
result = subprocess.run([exe_path, str(n)], capture_output=True, text=True) | ||
|
||
# Parse the output to get the runtime | ||
runtime = float(result.stdout.strip()) | ||
print(exe,runtime) | ||
|
||
# Append the runtime to the corresponding executable list | ||
runtimes[exe].append(runtime) | ||
|
||
# Plot the data | ||
plt.figure(figsize=(12, 6)) | ||
|
||
# Loop through each executable and plot the runtimes | ||
for exe, times in runtimes.items(): | ||
plt.plot(input_sizes, times, marker='o', label=exe) | ||
|
||
plt.xlabel('Iterations') | ||
plt.ylabel('Runtime (s)') | ||
plt.title('Benchmark of Function Versions') | ||
plt.legend() | ||
plt.grid(True) | ||
plt.tight_layout() | ||
|
||
output_file = 'benchmark_plot.png' # Specify your desired output file name and format | ||
plt.savefig(output_file) | ||
# Show the plot |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#pragma once | ||
|
||
typedef struct { | ||
int rows; | ||
int cols; | ||
float* data; // array | ||
} matrix; | ||
|
||
double time(int n); | ||
matrix* new_matrix_d(int rows, int cols); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#include "../template.cuh" | ||
|
||
matrix* new_matrix(int rows, int cols) { | ||
matrix* res = (matrix*)malloc(sizeof(matrix)); | ||
res->rows = rows; | ||
res->cols = cols; | ||
res->data = (float*)malloc((rows * cols) * sizeof(float)); | ||
return res; | ||
} | ||
|
||
matrix* new_matrix_d(int rows, int cols) { | ||
matrix* res = (matrix*)malloc(sizeof(matrix)); | ||
res->rows = rows; | ||
res->cols = cols; | ||
res->cols = cols; | ||
cudaMalloc((void**)&(res->data), rows * cols * sizeof(float)); | ||
return res; | ||
} | ||
|
||
__global__ void matrix_add(float *a, float*b ,int rows) | ||
{ | ||
int idx = blockIdx.x * blockDim.x + threadIdx.x; | ||
if (idx<rows){ | ||
a[idx]+=b[idx]; | ||
} | ||
} | ||
|
||
double time(int n) { | ||
int row=100000; | ||
matrix* a = new_matrix_d(row, 1); | ||
matrix* b = new_matrix_d(row, 1); | ||
cudaStream_t stream1; | ||
cudaStreamCreate ( &stream1); | ||
|
||
int thread=1024; | ||
int block=((row+thread-1)/thread); | ||
|
||
clock_t start = clock(); | ||
for(int i=0;i<n;i++){ | ||
matrix_add<<<1,1,0,stream1>>>(a->data,b->data,row); | ||
} | ||
double seconds = (double)(clock() - (double)start) / CLOCKS_PER_SEC; | ||
return seconds; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#include "../template.cuh" | ||
|
||
matrix* new_matrix(int rows, int cols) { | ||
matrix* res = (matrix*)malloc(sizeof(matrix)); | ||
res->rows = rows; | ||
res->cols = cols; | ||
res->data = (float*)malloc((rows * cols) * sizeof(float)); | ||
return res; | ||
} | ||
|
||
matrix* new_matrix_d(int rows, int cols) { | ||
matrix* res = (matrix*)malloc(sizeof(matrix)); | ||
res->rows = rows; | ||
res->cols = cols; | ||
res->cols = cols; | ||
cudaMalloc((void**)&(res->data), rows * cols * sizeof(float)); | ||
return res; | ||
} | ||
|
||
void matrix_add(float* a, float* b, int rows) { | ||
for (int i = 0; i < rows; i++) { | ||
a[i] += b[i]; | ||
} | ||
} | ||
|
||
double time(int n) { | ||
int row=100000; | ||
matrix* a = new_matrix(row, 1); | ||
matrix* b = new_matrix(row, 1); | ||
|
||
clock_t start = clock(); | ||
for (int i = 0; i < n; i++) { | ||
matrix_add(a->data, b->data,row); | ||
} | ||
double seconds = (double)(clock() - (double)start) / CLOCKS_PER_SEC; | ||
return seconds; | ||
} |
Oops, something went wrong.