Skip to content

Commit

Permalink
hip vector and matrix tests pass
Browse files Browse the repository at this point in the history
  • Loading branch information
kswirydo committed Oct 28, 2023
1 parent 8c60e7e commit ab0fc16
Show file tree
Hide file tree
Showing 12 changed files with 226 additions and 160 deletions.
13 changes: 7 additions & 6 deletions resolve/hip/hipKernels.hip
Original file line number Diff line number Diff line change
Expand Up @@ -103,22 +103,23 @@ __global__ void massAxpy3_kernel(int N,
const double* alpha) {

unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;

unsigned int t = threadIdx.x;

__shared__ double s_alpha[maxk];
if(t < k) {
s_alpha[t] = alpha[t];
}
__syncthreads();

if(i < N) {
double temp = 0.0f;
while (i < N){
double temp = 0.0;
for(int j = 0; j < k; ++j) {
temp += x_data[j * N + i] * s_alpha[j];
}
y_data[i] -= temp;
i += (blockDim.x*gridDim.x);
}
}

__global__ void matrixInfNormPart1(const int n,
const int nnz,
const int* a_ia,
Expand Down Expand Up @@ -153,7 +154,7 @@ void mass_inner_product_two_vectors(int n,
}
void mass_axpy(int n, int i, double* x, double* y, double* alpha)
{
hipLaunchKernelGGL(massAxpy3_kernel, dim3((n + 384 - 1) / 384), dim3(384), 0, 0, n, i + 1, x, y, alpha);
hipLaunchKernelGGL(massAxpy3_kernel, dim3((n + 384 - 1) / 384), dim3(384), 0, 0, n, i, x, y, alpha);
}

void matrix_row_sums(int n,
Expand All @@ -162,5 +163,5 @@ void matrix_row_sums(int n,
double* a_val,
double* result)
{
hipLaunchKernelGGL(matrixInfNormPart1,dim3(1000),dim3(1024), 0, 0, n, nnz, a_ia, a_val, result);
hipLaunchKernelGGL(matrixInfNormPart1,dim3(1000),dim3(1024), 0, 0, n, nnz, a_ia, a_val, result);
}
3 changes: 2 additions & 1 deletion resolve/hip/hipVectorKernels.hip
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ namespace kernels {
__global__ void set_const(index_type n, real_type val, real_type* arr)
{
index_type i = blockIdx.x * blockDim.x + threadIdx.x;
if(i < n)
while (i < n)
{
arr[i] = val;
i += blockDim.x * gridDim.x;
}
}
} // namespace kernels
Expand Down
30 changes: 15 additions & 15 deletions resolve/matrix/Coo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ namespace ReSolve
copyData("cpu");
return this->h_row_data_;
} else {
if (memspace == "cuda") {
copyData("cuda");
if ((memspace == "cuda") || (memspace == "hip")) {
copyData(memspace);
return this->d_row_data_;
} else {
return nullptr;
Expand All @@ -48,8 +48,8 @@ namespace ReSolve
copyData("cpu");
return this->h_col_data_;
} else {
if (memspace == "cuda") {
copyData("cuda");
if ((memspace == "cuda") || (memspace == "hip")) {
copyData(memspace);
return this->d_col_data_;
} else {
return nullptr;
Expand All @@ -63,8 +63,8 @@ namespace ReSolve
copyData("cpu");
return this->h_val_data_;
} else {
if (memspace == "cuda") {
copyData("cuda");
if ((memspace == "cuda") || (memspace == "hip")) {
copyData(memspace);
return this->d_val_data_;
} else {
return nullptr;
Expand All @@ -81,9 +81,9 @@ namespace ReSolve
setNotUpdated();
int control=-1;
if ((memspaceIn == "cpu") && (memspaceOut == "cpu")){ control = 0;}
if ((memspaceIn == "cpu") && (memspaceOut == "cuda")){ control = 1;}
if ((memspaceIn == "cuda") && (memspaceOut == "cpu")){ control = 2;}
if ((memspaceIn == "cuda") && (memspaceOut == "cuda")){ control = 3;}
if ((memspaceIn == "cpu") && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 1;}
if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && (memspaceOut == "cpu")){ control = 2;}
if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 3;}

if (memspaceOut == "cpu") {
//check if cpu data allocated
Expand All @@ -98,7 +98,7 @@ namespace ReSolve
}
}

if (memspaceOut == "cuda") {
if ((memspaceOut == "cuda") || (memspaceOut == "hip")) {
//check if cuda data allocated
if (d_row_data_ == nullptr) {
mem_.allocateArrayOnDevice(&d_row_data_, nnz_current);
Expand All @@ -120,23 +120,23 @@ namespace ReSolve
owns_cpu_data_ = true;
owns_cpu_vals_ = true;
break;
case 2://cuda->cpu
case 2://gpu->cpu
mem_.copyArrayDeviceToHost(h_row_data_, row_data, nnz_current);
mem_.copyArrayDeviceToHost(h_col_data_, col_data, nnz_current);
mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current);
h_data_updated_ = true;
owns_cpu_data_ = true;
owns_cpu_vals_ = true;
break;
case 1://cpu->cuda
case 1://cpu->gpu
mem_.copyArrayHostToDevice(d_row_data_, row_data, nnz_current);
mem_.copyArrayHostToDevice(d_col_data_, col_data, nnz_current);
mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current);
d_data_updated_ = true;
owns_gpu_data_ = true;
owns_gpu_vals_ = true;
break;
case 3://cuda->cuda
case 3://gpu->gpua
mem_.copyArrayDeviceToDevice(d_row_data_, row_data, nnz_current);
mem_.copyArrayDeviceToDevice(d_col_data_, col_data, nnz_current);
mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current);
Expand Down Expand Up @@ -176,7 +176,7 @@ namespace ReSolve
return 0;
}

if (memspace == "cuda") {
if ((memspace == "cuda") || (memspace == "hip")) {
mem_.allocateArrayOnDevice(&d_row_data_, nnz_current);
mem_.allocateArrayOnDevice(&d_col_data_, nnz_current);
mem_.allocateArrayOnDevice(&d_val_data_, nnz_current);
Expand Down Expand Up @@ -215,7 +215,7 @@ namespace ReSolve
return 0;
}

if (memspaceOut == "cuda") {
if ((memspaceOut == "cuda") || (memspaceOut == "hip")) {
if ((d_data_updated_ == false) && (h_data_updated_ == true)) {
if (d_row_data_ == nullptr) {
mem_.allocateArrayOnDevice(&d_row_data_, nnz_current);
Expand Down
30 changes: 15 additions & 15 deletions resolve/matrix/Csc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ namespace ReSolve
copyData("cpu");
return this->h_row_data_;
} else {
if (memspace == "cuda") {
copyData("cuda");
if ((memspace == "cuda") || (memspace == "hip")) {
copyData(memspace);
return this->d_row_data_;
} else {
return nullptr;
Expand All @@ -45,8 +45,8 @@ namespace ReSolve
copyData("cpu");
return this->h_col_data_;
} else {
if (memspace == "cuda") {
copyData("cuda");
if ((memspace == "cuda") || (memspace == "hip")) {
copyData(memspace);
return this->d_col_data_;
} else {
return nullptr;
Expand All @@ -60,8 +60,8 @@ namespace ReSolve
copyData("cpu");
return this->h_val_data_;
} else {
if (memspace == "cuda") {
copyData("cuda");
if ((memspace == "cuda") || (memspace == "hip")) {
copyData(memspace);
return this->d_val_data_;
} else {
return nullptr;
Expand All @@ -77,9 +77,9 @@ namespace ReSolve
int control=-1;
setNotUpdated();
if ((memspaceIn == "cpu") && (memspaceOut == "cpu")){ control = 0;}
if ((memspaceIn == "cpu") && (memspaceOut == "cuda")){ control = 1;}
if ((memspaceIn == "cuda") && (memspaceOut == "cpu")){ control = 2;}
if ((memspaceIn == "cuda") && (memspaceOut == "cuda")){ control = 3;}
if ((memspaceIn == "cpu") && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 1;}
if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && (memspaceOut == "cpu")){ control = 2;}
if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 3;}

if (memspaceOut == "cpu") {
//check if cpu data allocated
Expand All @@ -94,7 +94,7 @@ namespace ReSolve
}
}

if (memspaceOut == "cuda") {
if ((memspaceOut == "cuda") || (memspaceOut == "hip")) {
//check if cuda data allocated
if (d_col_data_ == nullptr) {
mem_.allocateArrayOnDevice(&d_col_data_, n_ + 1);
Expand All @@ -116,23 +116,23 @@ namespace ReSolve
owns_cpu_data_ = true;
owns_cpu_vals_ = true;
break;
case 2://cuda->cpu
case 2://gpu->cpu
mem_.copyArrayDeviceToHost(h_col_data_, col_data, n_ + 1);
mem_.copyArrayDeviceToHost(h_row_data_, row_data, nnz_current);
mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current);
h_data_updated_ = true;
owns_cpu_data_ = true;
owns_cpu_vals_ = true;
break;
case 1://cpu->cuda
case 1://cpu->gpu
mem_.copyArrayHostToDevice(d_col_data_, col_data, n_ + 1);
mem_.copyArrayHostToDevice(d_row_data_, row_data, nnz_current);
mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current);
d_data_updated_ = true;
owns_gpu_data_ = true;
owns_gpu_vals_ = true;
break;
case 3://cuda->cuda
case 3://gpu->gpu
mem_.copyArrayDeviceToDevice(d_col_data_, col_data, n_ + 1);
mem_.copyArrayDeviceToDevice(d_row_data_, row_data, nnz_current);
mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current);
Expand Down Expand Up @@ -173,7 +173,7 @@ namespace ReSolve
return 0;
}

if (memspace == "cuda") {
if ((memspace == "cuda") || (memspace == "hip")) {
mem_.allocateArrayOnDevice(&d_col_data_, n_ + 1);
mem_.allocateArrayOnDevice(&d_row_data_, nnz_current);
mem_.allocateArrayOnDevice(&d_val_data_, nnz_current);
Expand Down Expand Up @@ -212,7 +212,7 @@ namespace ReSolve
return 0;
}

if (memspaceOut == "cuda") {
if ((memspaceOut == "cuda") || (memspaceOut == "hip")) {
if ((d_data_updated_ == false) && (h_data_updated_ == true)) {
if (d_col_data_ == nullptr) {
mem_.allocateArrayOnDevice(&d_col_data_, n_ + 1);
Expand Down
30 changes: 15 additions & 15 deletions resolve/matrix/Csr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ namespace ReSolve
copyData("cpu");
return this->h_row_data_;
} else {
if (memspace == "cuda") {
copyData("cuda");
if ((memspace == "cuda") || (memspace == "hip")) {
copyData(memspace);
return this->d_row_data_;
} else {
return nullptr;
Expand All @@ -45,8 +45,8 @@ namespace ReSolve
copyData("cpu");
return this->h_col_data_;
} else {
if (memspace == "cuda") {
copyData("cuda");
if ((memspace == "cuda") || (memspace == "hip")) {
copyData(memspace);
return this->d_col_data_;
} else {
return nullptr;
Expand All @@ -60,8 +60,8 @@ namespace ReSolve
copyData("cpu");
return this->h_val_data_;
} else {
if (memspace == "cuda") {
copyData("cuda");
if ((memspace == "cuda") || (memspace == "hip")) {
copyData(memspace);
return this->d_val_data_;
} else {
return nullptr;
Expand All @@ -77,9 +77,9 @@ namespace ReSolve
setNotUpdated();
int control = -1;
if ((memspaceIn == "cpu") && (memspaceOut == "cpu")){ control = 0;}
if ((memspaceIn == "cpu") && (memspaceOut == "cuda")){ control = 1;}
if ((memspaceIn == "cuda") && (memspaceOut == "cpu")){ control = 2;}
if ((memspaceIn == "cuda") && (memspaceOut == "cuda")){ control = 3;}
if ((memspaceIn == "cpu") && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 1;}
if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && (memspaceOut == "cpu")){ control = 2;}
if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 3;}

if (memspaceOut == "cpu") {
//check if cpu data allocated
Expand All @@ -94,7 +94,7 @@ namespace ReSolve
}
}

if (memspaceOut == "cuda") {
if ((memspaceOut == "cuda") || (memspaceOut == "hip")) {
//check if cuda data allocated
if (d_row_data_ == nullptr) {
mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1);
Expand All @@ -118,23 +118,23 @@ namespace ReSolve
owns_cpu_data_ = true;
owns_cpu_vals_ = true;
break;
case 2://cuda->cpu
case 2://gpu->cpu
mem_.copyArrayDeviceToHost(h_row_data_, row_data, n_ + 1);
mem_.copyArrayDeviceToHost(h_col_data_, col_data, nnz_current);
mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current);
h_data_updated_ = true;
owns_cpu_data_ = true;
owns_cpu_vals_ = true;
break;
case 1://cpu->cuda
case 1://cpu->gpu
mem_.copyArrayHostToDevice(d_row_data_, row_data, n_ + 1);
mem_.copyArrayHostToDevice(d_col_data_, col_data, nnz_current);
mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current);
d_data_updated_ = true;
owns_gpu_data_ = true;
owns_gpu_vals_ = true;
break;
case 3://cuda->cuda
case 3://gpu->gpu
mem_.copyArrayDeviceToDevice(d_row_data_, row_data, n_ + 1);
mem_.copyArrayDeviceToDevice(d_col_data_, col_data, nnz_current);
mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current);
Expand Down Expand Up @@ -174,7 +174,7 @@ namespace ReSolve
return 0;
}

if (memspace == "cuda") {
if ((memspace == "cuda") || (memspace == "hip")) {
mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1);
mem_.allocateArrayOnDevice(&d_col_data_, nnz_current);
mem_.allocateArrayOnDevice(&d_val_data_, nnz_current);
Expand Down Expand Up @@ -212,7 +212,7 @@ namespace ReSolve
return 0;
}

if (memspaceOut == "cuda") {
if ((memspaceOut == "cuda") || (memspaceOut == "hip")) {
if ((d_data_updated_ == false) && (h_data_updated_ == true)) {
if (d_row_data_ == nullptr) {
mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1);
Expand Down
Loading

0 comments on commit ab0fc16

Please sign in to comment.