diff --git a/cmake/ReSolveFindHipLibraries.cmake b/cmake/ReSolveFindHipLibraries.cmake
index 83b7c220..e754da0d 100644
--- a/cmake/ReSolveFindHipLibraries.cmake
+++ b/cmake/ReSolveFindHipLibraries.cmake
@@ -9,6 +9,8 @@ find_package(hipblas REQUIRED)
 target_link_libraries(resolve_hip INTERFACE
   #hip::host 
   hip::device
+  rocblas
+  rocsparse
   #roc::hipblas
 )
 
diff --git a/resolve/hip/CMakeLists.txt b/resolve/hip/CMakeLists.txt
index f0a93b04..f8d7a457 100644
--- a/resolve/hip/CMakeLists.txt
+++ b/resolve/hip/CMakeLists.txt
@@ -7,14 +7,14 @@
 ]]
 
 set(ReSolve_HIP_SRC
-    # hipKernels.cu
+    hipKernels.hip
     hipVectorKernels.hip
     MemoryUtils.hip
 )
 
 set(ReSolve_HIP_HEADER_INSTALL
     # hipKernels.h
-    # hipVectorKernels.h
+    hipVectorKernels.h
     HipMemory.hpp
     # hip_check_errors.hpp
 )
diff --git a/resolve/hip/hipKernels.h b/resolve/hip/hipKernels.h
new file mode 100644
index 00000000..9c48783a
--- /dev/null
+++ b/resolve/hip/hipKernels.h
@@ -0,0 +1,14 @@
+void mass_inner_product_two_vectors(int n, 
+                                    int i, 
+                                    double* vec1, 
+                                    double* vec2, 
+                                    double* mvec, 
+                                    double* result);
+void mass_axpy(int n, int i, double* x, double* y, double* alpha);
+
+//needed for matrix inf nrm
+void matrix_row_sums(int n, 
+                     int nnz, 
+                     int* a_ia,
+                     double* a_val, 
+                     double* result);
diff --git a/resolve/hip/hipKernels.hip b/resolve/hip/hipKernels.hip
new file mode 100644
index 00000000..13f53d85
--- /dev/null
+++ b/resolve/hip/hipKernels.hip
@@ -0,0 +1,167 @@
+#include "hipKernels.h"
+#define maxk 1024
+#define Tv5 1024
+
+#include <hip/hip_runtime.h>
+
+//computes V^T[u1 u2] where v is n x k and u1 and u2 are nx1
+__global__ void MassIPTwoVec_kernel(const double* __restrict__ u1, 
+                                    const double* __restrict__ u2, 
+                                    const double* __restrict__ v, 
+                                    double* result,
+                                    const int k, 
+                                    const int N)
+{
+  int t = threadIdx.x;
+  int bsize = blockDim.x;
+
+  // assume T threads per thread block (and k reductions to be performed)
+  volatile __shared__ double s_tmp1[Tv5];
+
+  volatile __shared__ double s_tmp2[Tv5];
+  // map between thread index space and the problem index space
+  int j = blockIdx.x;
+  s_tmp1[t] = 0.0f;
+  s_tmp2[t] = 0.0f;
+  int nn = t;
+  double can1, can2, cbn;
+
+  while(nn < N) {
+    can1 = u1[nn];
+    can2 = u2[nn];
+
+    cbn = v[N * j + nn];
+    s_tmp1[t] += can1 * cbn;
+    s_tmp2[t] += can2 * cbn;
+
+    nn += bsize;
+  }
+
+  __syncthreads();
+
+  if(Tv5 >= 1024) {
+    if(t < 512) {
+      s_tmp1[t] += s_tmp1[t + 512];
+      s_tmp2[t] += s_tmp2[t + 512];
+    }
+    __syncthreads();
+  }
+  if(Tv5 >= 512) {
+    if(t < 256) {
+      s_tmp1[t] += s_tmp1[t + 256];
+      s_tmp2[t] += s_tmp2[t + 256];
+    }
+    __syncthreads();
+  }
+  {
+    if(t < 128) {
+      s_tmp1[t] += s_tmp1[t + 128];
+      s_tmp2[t] += s_tmp2[t + 128];
+    }
+    __syncthreads();
+  }
+  {
+    if(t < 64) {
+      s_tmp1[t] += s_tmp1[t + 64];
+      s_tmp2[t] += s_tmp2[t + 64];
+    }
+    __syncthreads();
+  }
+
+  if(t < 32) {
+    s_tmp1[t] += s_tmp1[t + 32];
+    s_tmp2[t] += s_tmp2[t + 32];
+
+    s_tmp1[t] += s_tmp1[t + 16];
+    s_tmp2[t] += s_tmp2[t + 16];
+
+    s_tmp1[t] += s_tmp1[t + 8];
+    s_tmp2[t] += s_tmp2[t + 8];
+
+    s_tmp1[t] += s_tmp1[t + 4];
+    s_tmp2[t] += s_tmp2[t + 4];
+
+    s_tmp1[t] += s_tmp1[t + 2];
+    s_tmp2[t] += s_tmp2[t + 2];
+
+    s_tmp1[t] += s_tmp1[t + 1];
+    s_tmp2[t] += s_tmp2[t + 1];
+  }
+  if(t == 0) {
+    result[blockIdx.x] = s_tmp1[0];
+    result[blockIdx.x + k] = s_tmp2[0];
+  }
+}
+
+
+//mass AXPY i.e y = y - x*alpha where alpha is [k x 1], needed in 1 and 2 synch GMRES
+
+__global__ void massAxpy3_kernel(int N,
+                                 int k,
+                                 const double* x_data,
+                                 double* y_data,
+                                 const double* alpha) {
+
+  unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
+
+  unsigned int t = threadIdx.x;
+
+  __shared__ double s_alpha[maxk];
+  if(t < k) {
+    s_alpha[t] = alpha[t];
+  }
+  __syncthreads();
+  while (i < N){
+    double temp = 0.0;
+    for(int j = 0; j < k; ++j) {
+      temp += x_data[j * N + i] * s_alpha[j];
+    }
+    y_data[i] -= temp;
+    i += (blockDim.x*gridDim.x);
+  }
+}
+__global__ void matrixInfNormPart1(const int n, 
+                                   const int nnz, 
+                                   const int* a_ia,
+                                   const double* a_val, 
+                                   double* result) {
+
+  // one thread per row, pass through rows
+  // and sum
+  // can be done through atomics
+  //\sum_{j=1}^m abs(a_{ij})
+
+  int idx = blockIdx.x*blockDim.x + threadIdx.x;
+  while (idx < n){
+    double sum = 0.0f;
+    for (int i = a_ia[idx]; i < a_ia[idx+1]; ++i) {
+      sum = sum + fabs(a_val[i]);
+    }
+    result[idx] = sum;
+    idx += (blockDim.x*gridDim.x);
+  }
+}
+
+
+void mass_inner_product_two_vectors(int n, 
+                                    int i, 
+                                    double* vec1, 
+                                    double* vec2, 
+                                    double* mvec, 
+                                    double* result)
+{
+  hipLaunchKernelGGL(MassIPTwoVec_kernel, dim3(i + 1), dim3(1024), 0, 0, vec1, vec2, mvec, result, i + 1, n);
+}
+void mass_axpy(int n, int i, double* x, double* y, double* alpha)
+{
+  hipLaunchKernelGGL(massAxpy3_kernel, dim3((n + 384 - 1) / 384), dim3(384), 0, 0, n, i, x, y, alpha);
+}
+
+void matrix_row_sums(int n, 
+                     int nnz, 
+                     int* a_ia,
+                     double* a_val, 
+                     double* result)
+{
+  hipLaunchKernelGGL(matrixInfNormPart1,dim3(1000),dim3(1024), 0, 0, n, nnz, a_ia, a_val, result);
+}
diff --git a/resolve/hip/hipVectorKernels.hip b/resolve/hip/hipVectorKernels.hip
index 3df2b84b..f68cd0b9 100644
--- a/resolve/hip/hipVectorKernels.hip
+++ b/resolve/hip/hipVectorKernels.hip
@@ -1,29 +1,28 @@
 #include <resolve/Common.hpp>
 #include <resolve/vector/VectorKernels.hpp>
-
-#include "hipVectorKernels.h"
+#include <hip/hip_runtime.h>
 
 namespace ReSolve { namespace vector {
 
 namespace kernels {
 
-__global__ void set_const(index_type n, real_type val, real_type* arr)
-{
-  index_type i = blockIdx.x * blockDim.x + threadIdx.x;
-  if(i < n)
+  __global__ void set_const(index_type n, real_type val, real_type* arr)
   {
-    arr[i] = val;
+    index_type i = blockIdx.x * blockDim.x + threadIdx.x;
+    while (i < n)
+    {
+      arr[i] = val;
+      i += blockDim.x * gridDim.x;
+    }
   }
-}
-
 } // namespace kernels
 
-void set_array_const(index_type n, real_type val, real_type* arr)
+void set_array_const(index_type  n, real_type val, real_type* arr)
 {
-  index_type num_blocks;
-  index_type block_size = 512;
-  num_blocks = (n + block_size - 1) / block_size;
-  kernels::set_const<<<num_blocks, block_size>>>(n, val, arr);
+   index_type num_blocks;
+   index_type block_size = 512;
+   num_blocks = (n + block_size - 1) / block_size;
+   hipLaunchKernelGGL( kernels::set_const, dim3(num_blocks), dim3(block_size), 0, 0, n, val, arr);
 }
 
-}} // namespace ReSolve::vector
\ No newline at end of file
+}} // namespace ReSolve::vector
diff --git a/resolve/matrix/CMakeLists.txt b/resolve/matrix/CMakeLists.txt
index 554c0ba7..565fa7c9 100644
--- a/resolve/matrix/CMakeLists.txt
+++ b/resolve/matrix/CMakeLists.txt
@@ -22,6 +22,11 @@ set(Matrix_CUDASDK_SRC
     MatrixHandlerCuda.cpp
 )
 
+# and on HIP
+set(Matrix_ROCM_SRC 
+  MatrixHandlerHip.cpp
+)
+
 # Header files to be installed
 set(Matrix_HEADER_INSTALL
     io.hpp
@@ -37,6 +42,10 @@ if(RESOLVE_USE_CUDA)
   set(Matrix_SRC ${Matrix_SRC} ${Matrix_CUDASDK_SRC})
 endif()
 
+if(RESOLVE_USE_HIP)
+  set(Matrix_SRC ${Matrix_SRC} ${Matrix_ROCM_SRC})
+endif()
+
 
 # Build shared library ReSolve::matrix
 add_library(resolve_matrix SHARED ${Matrix_SRC})
@@ -47,6 +56,10 @@ if (RESOLVE_USE_CUDA)
   target_link_libraries(resolve_matrix PUBLIC resolve_backend_cuda)
 endif()
 
+if (RESOLVE_USE_HIP)
+  target_link_libraries(resolve_matrix PUBLIC resolve_backend_hip)
+endif()
+
 # Link to dummy device backend if GPU support is not enabled
 if (NOT RESOLVE_USE_GPU)
   target_link_libraries(resolve_matrix PUBLIC resolve_backend_cpu)
diff --git a/resolve/matrix/Coo.cpp b/resolve/matrix/Coo.cpp
index c8caebf6..a91f94a9 100644
--- a/resolve/matrix/Coo.cpp
+++ b/resolve/matrix/Coo.cpp
@@ -33,8 +33,8 @@ namespace ReSolve
       copyData("cpu");
       return this->h_row_data_;
     } else {
-      if (memspace == "cuda") {
-        copyData("cuda");
+      if ((memspace == "cuda") || (memspace == "hip")) {
+        copyData(memspace);
         return this->d_row_data_;
       } else {
         return nullptr;
@@ -48,8 +48,8 @@ namespace ReSolve
       copyData("cpu");
       return this->h_col_data_;
     } else {
-      if (memspace == "cuda") {
-        copyData("cuda");
+      if ((memspace == "cuda") || (memspace == "hip")) {
+        copyData(memspace);
         return this->d_col_data_;
       } else {
         return nullptr;
@@ -63,8 +63,8 @@ namespace ReSolve
       copyData("cpu");
       return this->h_val_data_;
     } else {
-      if (memspace == "cuda") {
-        copyData("cuda");
+      if ((memspace == "cuda") || (memspace == "hip")) {
+        copyData(memspace);
         return this->d_val_data_;
       } else {
         return nullptr;
@@ -81,9 +81,9 @@ namespace ReSolve
     setNotUpdated();
     int control=-1;
     if ((memspaceIn == "cpu") && (memspaceOut == "cpu")){ control = 0;}
-    if ((memspaceIn == "cpu") && (memspaceOut == "cuda")){ control = 1;}
-    if ((memspaceIn == "cuda") && (memspaceOut == "cpu")){ control = 2;}
-    if ((memspaceIn == "cuda") && (memspaceOut == "cuda")){ control = 3;}
+    if ((memspaceIn == "cpu") && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 1;}
+    if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && (memspaceOut == "cpu")){ control = 2;}
+    if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 3;}
 
     if (memspaceOut == "cpu") {
       //check if cpu data allocated	
@@ -98,7 +98,7 @@ namespace ReSolve
       }
     }
 
-    if (memspaceOut == "cuda") {
+    if ((memspaceOut == "cuda") || (memspaceOut == "hip")) {
       //check if cuda data allocated
       if (d_row_data_ == nullptr) {
         mem_.allocateArrayOnDevice(&d_row_data_, nnz_current);
@@ -120,7 +120,7 @@ namespace ReSolve
         owns_cpu_data_ = true;
         owns_cpu_vals_ = true;
         break;
-      case 2://cuda->cpu
+      case 2://gpu->cpu
         mem_.copyArrayDeviceToHost(h_row_data_, row_data, nnz_current);
         mem_.copyArrayDeviceToHost(h_col_data_, col_data, nnz_current);
         mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current);
@@ -128,7 +128,7 @@ namespace ReSolve
         owns_cpu_data_ = true;
         owns_cpu_vals_ = true;
         break;
-      case 1://cpu->cuda
+      case 1://cpu->gpu
         mem_.copyArrayHostToDevice(d_row_data_, row_data, nnz_current);
         mem_.copyArrayHostToDevice(d_col_data_, col_data, nnz_current);
         mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current);
@@ -136,7 +136,7 @@ namespace ReSolve
         owns_gpu_data_ = true;
         owns_gpu_vals_ = true;
         break;
-      case 3://cuda->cuda
+      case 3://gpu->gpua
         mem_.copyArrayDeviceToDevice(d_row_data_, row_data, nnz_current);
         mem_.copyArrayDeviceToDevice(d_col_data_, col_data, nnz_current);
         mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current);
@@ -176,7 +176,7 @@ namespace ReSolve
       return 0;
     }
 
-    if (memspace == "cuda") {
+    if ((memspace == "cuda") || (memspace == "hip")) {
       mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); 
       mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); 
       mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); 
@@ -215,7 +215,7 @@ namespace ReSolve
       return 0;
     }
 
-    if (memspaceOut == "cuda") {
+    if ((memspaceOut == "cuda") || (memspaceOut == "hip")) {
       if ((d_data_updated_ == false) && (h_data_updated_ == true)) {
         if (d_row_data_ == nullptr) {
           mem_.allocateArrayOnDevice(&d_row_data_, nnz_current);
diff --git a/resolve/matrix/Csc.cpp b/resolve/matrix/Csc.cpp
index 1a305e03..e2ea765f 100644
--- a/resolve/matrix/Csc.cpp
+++ b/resolve/matrix/Csc.cpp
@@ -30,8 +30,8 @@ namespace ReSolve
       copyData("cpu");
       return this->h_row_data_;
     } else {
-      if (memspace == "cuda") {
-        copyData("cuda");
+      if ((memspace == "cuda") || (memspace == "hip")) {
+        copyData(memspace);
         return this->d_row_data_;
       } else {
         return nullptr;
@@ -45,8 +45,8 @@ namespace ReSolve
       copyData("cpu");
       return this->h_col_data_;
     } else {
-      if (memspace == "cuda") {
-        copyData("cuda");
+      if ((memspace == "cuda") || (memspace == "hip")) {
+        copyData(memspace);
         return this->d_col_data_;
       } else {
         return nullptr;
@@ -60,8 +60,8 @@ namespace ReSolve
       copyData("cpu");
       return this->h_val_data_;
     } else {
-      if (memspace == "cuda") {
-        copyData("cuda");
+      if ((memspace == "cuda") || (memspace == "hip")) {
+        copyData(memspace);
         return this->d_val_data_;
       } else {
         return nullptr;
@@ -77,9 +77,9 @@ namespace ReSolve
     int control=-1;
     setNotUpdated();
     if ((memspaceIn == "cpu") && (memspaceOut == "cpu")){ control = 0;}
-    if ((memspaceIn == "cpu") && (memspaceOut == "cuda")){ control = 1;}
-    if ((memspaceIn == "cuda") && (memspaceOut == "cpu")){ control = 2;}
-    if ((memspaceIn == "cuda") && (memspaceOut == "cuda")){ control = 3;}
+    if ((memspaceIn == "cpu") && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 1;}
+    if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && (memspaceOut == "cpu")){ control = 2;}
+    if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 3;}
 
     if (memspaceOut == "cpu") {
       //check if cpu data allocated
@@ -94,7 +94,7 @@ namespace ReSolve
       }
     }
 
-    if (memspaceOut == "cuda") {
+    if ((memspaceOut == "cuda") || (memspaceOut == "hip")) {
       //check if cuda data allocated
       if (d_col_data_ == nullptr) {
         mem_.allocateArrayOnDevice(&d_col_data_, n_ + 1); 
@@ -116,7 +116,7 @@ namespace ReSolve
         owns_cpu_data_ = true;
         owns_cpu_vals_ = true;
         break;
-      case 2://cuda->cpu
+      case 2://gpu->cpu
         mem_.copyArrayDeviceToHost(h_col_data_, col_data,      n_ + 1);
         mem_.copyArrayDeviceToHost(h_row_data_, row_data, nnz_current);
         mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current);
@@ -124,7 +124,7 @@ namespace ReSolve
         owns_cpu_data_ = true;
         owns_cpu_vals_ = true;
         break;
-      case 1://cpu->cuda
+      case 1://cpu->gpu
         mem_.copyArrayHostToDevice(d_col_data_, col_data,      n_ + 1);
         mem_.copyArrayHostToDevice(d_row_data_, row_data, nnz_current);
         mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current);
@@ -132,7 +132,7 @@ namespace ReSolve
         owns_gpu_data_ = true;
         owns_gpu_vals_ = true;
         break;
-      case 3://cuda->cuda
+      case 3://gpu->gpu
         mem_.copyArrayDeviceToDevice(d_col_data_, col_data,      n_ + 1);
         mem_.copyArrayDeviceToDevice(d_row_data_, row_data, nnz_current);
         mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current);
@@ -173,7 +173,7 @@ namespace ReSolve
       return 0;
     }
 
-    if (memspace == "cuda") {
+    if ((memspace == "cuda") || (memspace == "hip")) {
       mem_.allocateArrayOnDevice(&d_col_data_,      n_ + 1); 
       mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); 
       mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); 
@@ -212,7 +212,7 @@ namespace ReSolve
       return 0;   
     }
 
-    if (memspaceOut == "cuda") {
+    if ((memspaceOut == "cuda") || (memspaceOut == "hip")) {
       if ((d_data_updated_ == false) && (h_data_updated_ == true)) {
         if (d_col_data_ == nullptr) {
           mem_.allocateArrayOnDevice(&d_col_data_, n_ + 1); 
diff --git a/resolve/matrix/Csr.cpp b/resolve/matrix/Csr.cpp
index f1ddd31f..dff33b48 100644
--- a/resolve/matrix/Csr.cpp
+++ b/resolve/matrix/Csr.cpp
@@ -30,8 +30,8 @@ namespace ReSolve
       copyData("cpu");
       return this->h_row_data_;
     } else {
-      if (memspace == "cuda") {
-        copyData("cuda");
+      if ((memspace == "cuda") || (memspace == "hip")) {
+        copyData(memspace);
         return this->d_row_data_;
       } else {
         return nullptr;
@@ -45,8 +45,8 @@ namespace ReSolve
       copyData("cpu");
       return this->h_col_data_;
     } else {
-      if (memspace == "cuda") {
-        copyData("cuda");
+      if ((memspace == "cuda") || (memspace == "hip")) {
+        copyData(memspace);
         return this->d_col_data_;
       } else {
         return nullptr;
@@ -60,8 +60,8 @@ namespace ReSolve
       copyData("cpu");
       return this->h_val_data_;
     } else {
-      if (memspace == "cuda") {
-        copyData("cuda");
+      if ((memspace == "cuda") || (memspace == "hip")) {
+        copyData(memspace);
         return this->d_val_data_;
       } else {
         return nullptr;
@@ -77,9 +77,9 @@ namespace ReSolve
     setNotUpdated();
     int control = -1;
     if ((memspaceIn == "cpu") && (memspaceOut == "cpu")){ control = 0;}
-    if ((memspaceIn == "cpu") && (memspaceOut == "cuda")){ control = 1;}
-    if ((memspaceIn == "cuda") && (memspaceOut == "cpu")){ control = 2;}
-    if ((memspaceIn == "cuda") && (memspaceOut == "cuda")){ control = 3;}
+    if ((memspaceIn == "cpu") && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 1;}
+    if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && (memspaceOut == "cpu")){ control = 2;}
+    if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 3;}
 
     if (memspaceOut == "cpu") {
       //check if cpu data allocated
@@ -94,7 +94,7 @@ namespace ReSolve
       }
     }
 
-    if (memspaceOut == "cuda") {
+    if ((memspaceOut == "cuda") || (memspaceOut == "hip")) {
       //check if cuda data allocated
       if (d_row_data_ == nullptr) {
         mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); 
@@ -118,7 +118,7 @@ namespace ReSolve
         owns_cpu_data_ = true;
         owns_cpu_vals_ = true;
         break;
-      case 2://cuda->cpu
+      case 2://gpu->cpu
         mem_.copyArrayDeviceToHost(h_row_data_, row_data,      n_ + 1);
         mem_.copyArrayDeviceToHost(h_col_data_, col_data, nnz_current);
         mem_.copyArrayDeviceToHost(h_val_data_, val_data, nnz_current);
@@ -126,7 +126,7 @@ namespace ReSolve
         owns_cpu_data_ = true;
         owns_cpu_vals_ = true;
         break;
-      case 1://cpu->cuda
+      case 1://cpu->gpu
         mem_.copyArrayHostToDevice(d_row_data_, row_data,      n_ + 1);
         mem_.copyArrayHostToDevice(d_col_data_, col_data, nnz_current);
         mem_.copyArrayHostToDevice(d_val_data_, val_data, nnz_current);
@@ -134,7 +134,7 @@ namespace ReSolve
         owns_gpu_data_ = true;
         owns_gpu_vals_ = true;
         break;
-      case 3://cuda->cuda
+      case 3://gpu->gpu
         mem_.copyArrayDeviceToDevice(d_row_data_, row_data,      n_ + 1);
         mem_.copyArrayDeviceToDevice(d_col_data_, col_data, nnz_current);
         mem_.copyArrayDeviceToDevice(d_val_data_, val_data, nnz_current);
@@ -174,7 +174,7 @@ namespace ReSolve
       return 0;   
     }
 
-    if (memspace == "cuda") {
+    if ((memspace == "cuda") || (memspace == "hip")) {
       mem_.allocateArrayOnDevice(&d_row_data_,      n_ + 1); 
       mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); 
       mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); 
@@ -212,7 +212,7 @@ namespace ReSolve
       return 0;
     }
 
-    if (memspaceOut == "cuda") {
+    if ((memspaceOut == "cuda") || (memspaceOut == "hip")) {
       if ((d_data_updated_ == false) && (h_data_updated_ == true)) {
         if (d_row_data_ == nullptr) {
           mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); 
diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp
index 8bf4302c..133a09f9 100644
--- a/resolve/matrix/MatrixHandler.cpp
+++ b/resolve/matrix/MatrixHandler.cpp
@@ -13,6 +13,9 @@
 #ifdef RESOLVE_USE_CUDA
 #include "MatrixHandlerCuda.hpp"
 #endif
+#ifdef RESOLVE_USE_HIP
+#include "MatrixHandlerHip.hpp"
+#endif
 
 namespace ReSolve {
   // Create a shortcut name for Logger static class
@@ -41,6 +44,7 @@ namespace ReSolve {
   {
     if (isCpuEnabled_)  delete cpuImpl_;
     if (isCudaEnabled_) delete cudaImpl_;
+    if (isHipEnabled_) delete hipImpl_;
   }
 
   /**
@@ -74,12 +78,31 @@ namespace ReSolve {
   }
 #endif
 
+#ifdef RESOLVE_USE_HIP
+  /**
+   * @brief Constructor taking pointer to the CUDA workspace as its parameter.
+   * 
+   * @post A CPU implementation instance is created because it is cheap and
+   * it does not require a workspace.
+   * 
+   * @post A HIP implementation instance is created with supplied workspace.
+   */
+  MatrixHandler::MatrixHandler(LinAlgWorkspaceHIP* new_workspace)
+  {
+    cpuImpl_  = new MatrixHandlerCpu();
+    hipImpl_ = new MatrixHandlerHip(new_workspace);
+    isCpuEnabled_  = true;
+    isHipEnabled_ = true;
+  }
+#endif
   void MatrixHandler::setValuesChanged(bool isValuesChanged, std::string memspace)
   {
     if (memspace == "cpu") {
       cpuImpl_->setValuesChanged(isValuesChanged);
     } else if (memspace == "cuda") {
       cudaImpl_->setValuesChanged(isValuesChanged);
+    } else if (memspace == "hip") {
+      hipImpl_->setValuesChanged(isValuesChanged);
     } else {
       out::error() << "Unsupported device " << memspace << "\n";
     }
@@ -230,6 +253,8 @@ namespace ReSolve {
     } else {
       if (memspace == "cuda"){      
         A_csr->updateData(csr_ia, csr_ja, csr_a, "cpu", "cuda");
+      } else if (memspace == "hip"){      
+        A_csr->updateData(csr_ia, csr_ja, csr_a, "cpu", "cuda");
       } else {
         //display error
       }
@@ -269,6 +294,9 @@ namespace ReSolve {
       return cudaImpl_->matvec(A, vec_x, vec_result, alpha, beta, matrixFormat);
     } else if (memspace == "cpu") {
         return cpuImpl_->matvec(A, vec_x, vec_result, alpha, beta, matrixFormat);
+    } else if (memspace == "hip") {
+      printf("about to run mv");
+        return hipImpl_->matvec(A, vec_x, vec_result, alpha, beta, matrixFormat);
     } else {
         out::error() << "Support for device " << memspace << " not implemented (yet)" << std::endl;
         return 1;
@@ -280,6 +308,8 @@ namespace ReSolve {
   {
     if (memspace == "cuda") { 
       return cudaImpl_->csc2csr(A_csc, A_csr);
+    } else if (memspace == "hip") {
+      return hipImpl_->csc2csr(A_csc, A_csr);
     } else if (memspace == "cpu") { 
       out::warning() << "Using untested csc2csr on CPU ..." << std::endl;
       return cpuImpl_->csc2csr(A_csc, A_csr);
diff --git a/resolve/matrix/MatrixHandler.hpp b/resolve/matrix/MatrixHandler.hpp
index 398a8039..cec61085 100644
--- a/resolve/matrix/MatrixHandler.hpp
+++ b/resolve/matrix/MatrixHandler.hpp
@@ -18,6 +18,7 @@ namespace ReSolve
   }
   class LinAlgWorkspaceCpu;
   class LinAlgWorkspaceCUDA;
+  class LinAlgWorkspaceHIP;
   class MatrixHandlerImpl;
 }
 
@@ -48,6 +49,7 @@ namespace ReSolve {
       MatrixHandler();
       MatrixHandler(LinAlgWorkspaceCpu* workspace);
       MatrixHandler(LinAlgWorkspaceCUDA* workspace);
+      MatrixHandler(LinAlgWorkspaceHIP* workspace);
       ~MatrixHandler();
 
       int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace);
@@ -70,9 +72,11 @@ namespace ReSolve {
       MemoryHandler mem_;      ///< Device memory manager object
       MatrixHandlerImpl*  cpuImpl_{nullptr}; ///< Pointer to CPU implementation
       MatrixHandlerImpl* cudaImpl_{nullptr}; ///< Pointer to CUDA implementation
+      MatrixHandlerImpl* hipImpl_{nullptr}; ///< Pointer to HIP implementation
 
       bool isCpuEnabled_{false};  ///< true if CPU  implementation is instantiated
       bool isCudaEnabled_{false}; ///< true if CUDA implementation is instantiated
+      bool isHipEnabled_{false}; ///< true if HIP implementation is instantiated
   };
 
 } // namespace ReSolve
diff --git a/resolve/matrix/MatrixHandlerHip.cpp b/resolve/matrix/MatrixHandlerHip.cpp
new file mode 100644
index 00000000..370849fa
--- /dev/null
+++ b/resolve/matrix/MatrixHandlerHip.cpp
@@ -0,0 +1,154 @@
+#include <algorithm>
+
+#include <resolve/utilities/logger/Logger.hpp>
+#include <resolve/vector/Vector.hpp>
+#include <resolve/matrix/Coo.hpp>
+#include <resolve/matrix/Csc.hpp>
+#include <resolve/matrix/Csr.hpp>
+#include <resolve/workspace/LinAlgWorkspaceHIP.hpp>
+#include "MatrixHandlerHip.hpp"
+
+namespace ReSolve {
+  // Create a shortcut name for Logger static class
+  using out = io::Logger;
+
+  MatrixHandlerHip::~MatrixHandlerHip()
+  {
+  }
+
+  MatrixHandlerHip::MatrixHandlerHip(LinAlgWorkspaceHIP* new_workspace)
+  {
+    workspace_ = new_workspace;
+  }
+
+  void MatrixHandlerHip::setValuesChanged(bool values_changed)
+  {
+    values_changed_ = values_changed;
+  }
+
+
+  int MatrixHandlerHip::matvec(matrix::Sparse* Ageneric, 
+                               vector_type* vec_x, 
+                               vector_type* vec_result, 
+                               const real_type* alpha, 
+                               const real_type* beta,
+                               std::string matrixFormat) 
+  {
+    using namespace constants;
+    int error_sum = 0;
+    if (matrixFormat == "csr") {
+      matrix::Csr* A = dynamic_cast<matrix::Csr*>(Ageneric);
+      //result = alpha *A*x + beta * result
+      rocsparse_status status;
+      LinAlgWorkspaceHIP* workspaceHIP = workspace_;
+
+      rocsparse_handle handle_rocsparse = workspaceHIP->getRocsparseHandle();
+      
+      rocsparse_mat_info infoA = workspaceHIP->getSpmvMatrixInfo();
+      rocsparse_mat_descr descrA =  workspaceHIP->getSpmvMatrixDescriptor();
+      
+      if (!workspaceHIP->matvecSetup()) {
+        //setup first, allocate, etc.
+        
+        rocsparse_create_mat_descr(&(descrA));
+        rocsparse_set_mat_index_base(descrA, rocsparse_index_base_zero);
+        rocsparse_set_mat_type(descrA, rocsparse_matrix_type_general);
+
+        rocsparse_create_mat_info(&infoA);
+        
+        status = rocsparse_dcsrmv_analysis(handle_rocsparse,
+                                           rocsparse_operation_none,
+                                           A->getNumRows(),
+                                           A->getNumColumns(),
+                                           A->getNnzExpanded(), 
+                                           descrA,
+                                           A->getValues("cuda"), 
+                                           A->getRowData("cuda"),
+                                           A->getColData("cuda"), // cuda is used as "device"
+                                           infoA);
+        error_sum += status;
+        mem_.deviceSynchronize();
+
+        workspaceHIP->matvecSetupDone();
+      } 
+      
+      status = rocsparse_dcsrmv(handle_rocsparse,
+                                rocsparse_operation_none,
+                                A->getNumRows(),
+                                A->getNumColumns(),
+                                A->getNnzExpanded(),
+                                alpha, 
+                                descrA,
+                                A->getValues("cuda"), 
+                                A->getRowData("cuda"),
+                                A->getColData("cuda"),
+                                infoA,
+                                vec_x->getData("cuda"),
+                                beta,
+                                vec_result->getData("cuda"));
+
+      error_sum += status;
+      mem_.deviceSynchronize();
+      if (status)
+        out::error() << "Matvec status: " << status 
+                      << "Last error code: " << mem_.getLastDeviceError() << std::endl;
+      vec_result->setDataUpdated("cuda");
+
+      return error_sum;
+    } else {
+      out::error() << "MatVec not implemented (yet) for " 
+                   << matrixFormat << " matrix format." << std::endl;
+      return 1;
+    }
+  }
+
+  int MatrixHandlerHip::Matrix1Norm(matrix::Sparse* /* A */, real_type* /* norm */)
+  {
+    return -1;
+  }
+
+  int MatrixHandlerHip::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr)
+  {
+    index_type error_sum = 0;
+    LinAlgWorkspaceHIP* workspaceHIP = (LinAlgWorkspaceHIP*) workspace_;
+
+    rocsparse_status status;
+    
+    A_csr->allocateMatrixData("cuda");
+    index_type n = A_csc->getNumRows();
+    index_type m = A_csc->getNumRows();
+    index_type nnz = A_csc->getNnz();
+    size_t bufferSize;
+    void* d_work;
+
+    status = rocsparse_csr2csc_buffer_size(workspaceHIP->getRocsparseHandle(),
+                                           n,
+                                           m,
+                                           nnz,
+                                           A_csc->getColData("cuda"), 
+                                           A_csc->getRowData("cuda"), 
+                                           rocsparse_action_numeric,
+                                           &bufferSize);
+
+    error_sum += status;
+    mem_.allocateBufferOnDevice(&d_work, bufferSize);
+    
+    status = rocsparse_dcsr2csc(workspaceHIP->getRocsparseHandle(),
+                                n,
+                                m,
+                                nnz,
+                                A_csc->getValues("cuda"), 
+                                A_csc->getColData("cuda"), 
+                                A_csc->getRowData("cuda"), 
+                                A_csr->getValues("cuda"), 
+                                A_csr->getRowData("cuda"),
+                                A_csr->getColData("cuda"), 
+                                rocsparse_action_numeric,
+                                rocsparse_index_base_zero,
+                                d_work);
+    error_sum += status;
+    return error_sum;
+    mem_.deleteOnDevice(d_work);
+  }
+
+} // namespace ReSolve
diff --git a/resolve/matrix/MatrixHandlerHip.hpp b/resolve/matrix/MatrixHandlerHip.hpp
new file mode 100644
index 00000000..7f06f3bd
--- /dev/null
+++ b/resolve/matrix/MatrixHandlerHip.hpp
@@ -0,0 +1,60 @@
+#pragma once
+#include <resolve/Common.hpp>
+#include <resolve/MemoryUtils.hpp>
+#include <resolve/matrix/MatrixHandlerImpl.hpp>
+
+namespace ReSolve
+{ 
+  namespace vector
+  {
+    class Vector;
+  }
+  namespace matrix
+  {
+    class Sparse;
+    class Coo;
+    class Csc;
+    class Csr;
+  }
+  class LinAlgWorkspaceHIP;
+}
+
+
+namespace ReSolve {
+  /**
+   * @class MatrixHandlerHip
+   * 
+   * @brief HIP implementation of the matrix handler.
+   */
+  class MatrixHandlerHip : public MatrixHandlerImpl
+  {
+    using vector_type = vector::Vector;
+    
+    public:
+      
+      MatrixHandlerHip(LinAlgWorkspaceHIP* workspace);
+      virtual ~MatrixHandlerHip();
+
+      int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr);
+      
+      virtual int matvec(matrix::Sparse* A,
+                 vector_type* vec_x,
+                 vector_type* vec_result,
+                 const real_type* alpha,
+                 const real_type* beta,
+                 std::string matrix_type);
+      
+      virtual int Matrix1Norm(matrix::Sparse *A, real_type* norm);
+      
+      void setValuesChanged(bool isValuesChanged); 
+    
+    private: 
+      
+      LinAlgWorkspaceHIP* workspace_{nullptr};
+      bool values_changed_{true}; ///< needed for matvec
+
+      MemoryHandler mem_; ///< Device memory manager object
+  };
+
+} // namespace ReSolve
+
diff --git a/resolve/vector/CMakeLists.txt b/resolve/vector/CMakeLists.txt
index 16d53010..89b1abc8 100644
--- a/resolve/vector/CMakeLists.txt
+++ b/resolve/vector/CMakeLists.txt
@@ -18,6 +18,13 @@ set(Vector_CUDASDK_SRC
     VectorHandlerCuda.cpp
 )
 
+#and hip
+
+set(Vector_ROCM_SRC 
+  VectorHandlerHip.cpp
+)
+
+
 # Header files to be installed
 set(Vector_HEADER_INSTALL
     Vector.hpp
@@ -30,6 +37,11 @@ if(RESOLVE_USE_CUDA)
   set(Vector_SRC ${Vector_SRC} ${Vector_CUDASDK_SRC})
 endif()
 
+# and hip
+if(RESOLVE_USE_HIP)
+  set(Vector_SRC ${Vector_SRC} ${Vector_ROCM_SRC})
+endif()
+
 add_library(resolve_vector SHARED ${Vector_SRC})
 target_link_libraries(resolve_vector PRIVATE resolve_logger)
 
@@ -38,6 +50,10 @@ if (RESOLVE_USE_CUDA)
   target_link_libraries(resolve_vector PUBLIC resolve_backend_cuda)
 endif()
 
+if (RESOLVE_USE_HIP)
+  target_link_libraries(resolve_vector PUBLIC resolve_backend_hip)
+endif()
+
 # If no GPU is enabled link to dummy device backend
 if(NOT RESOLVE_USE_GPU)
   target_link_libraries(resolve_vector PUBLIC resolve_backend_cpu)
diff --git a/resolve/vector/Vector.cpp b/resolve/vector/Vector.cpp
index 7934e8b0..37779ea5 100644
--- a/resolve/vector/Vector.cpp
+++ b/resolve/vector/Vector.cpp
@@ -60,7 +60,7 @@ namespace ReSolve { namespace vector {
       cpu_updated_ = true;
       gpu_updated_ = false;
     } else {
-      if (memspace == "cuda") { 
+      if ((memspace == "cuda") || (memspace == "hip")) {
         d_data_ = data;
         gpu_updated_ = true;
         cpu_updated_ = false;
@@ -76,7 +76,7 @@ namespace ReSolve { namespace vector {
       cpu_updated_ = true;
       gpu_updated_ = false;
     } else {
-      if (memspace == "cuda") { 
+      if ((memspace == "cuda") || (memspace == "hip")) {
         gpu_updated_ = true;
         cpu_updated_ = false;
       } else {
@@ -89,15 +89,15 @@ namespace ReSolve { namespace vector {
   {
     int control=-1;
     if ((memspaceIn == "cpu") && (memspaceOut == "cpu")){ control = 0;}
-    if ((memspaceIn == "cpu") && (memspaceOut == "cuda")){ control = 1;}
-    if ((memspaceIn == "cuda") && (memspaceOut == "cpu")){ control = 2;}
-    if ((memspaceIn == "cuda") && (memspaceOut == "cuda")){ control = 3;}
+    if ((memspaceIn == "cpu") && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 1;}
+    if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && (memspaceOut == "cpu")){ control = 2;}
+    if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 3;}
 
     if ((memspaceOut == "cpu") && (h_data_ == nullptr)){
       //allocate first
       h_data_ = new real_type[n_ * k_]; 
     }
-    if ((memspaceOut == "cuda") && (d_data_ == nullptr)){
+    if (((memspaceOut == "cuda") || (memspaceOut == "hip")) && (d_data_ == nullptr)){
       //allocate first
       mem_.allocateArrayOnDevice(&d_data_, n_ * k_);
     } 
@@ -109,19 +109,19 @@ namespace ReSolve { namespace vector {
         cpu_updated_ = true;
         gpu_updated_ = false;
         break;
-      case 2: //cuda->cpu
+      case 2: //gpu->cpu
         mem_.copyArrayDeviceToHost(h_data_, data, n_current_ * k_);
         owns_gpu_data_ = true;
         cpu_updated_ = true;
         gpu_updated_ = false;
         break;
-      case 1: //cpu->cuda
+      case 1: //cpu->gpu
         mem_.copyArrayHostToDevice(d_data_, data, n_current_ * k_);
         owns_gpu_data_ = true;
         gpu_updated_ = true;
         cpu_updated_ = false;
         break;
-      case 3: //cuda->cuda
+      case 3: //gpu->gpu
         mem_.copyArrayDeviceToDevice(d_data_, data, n_current_ * k_);
         owns_gpu_data_ = true;
         gpu_updated_ = true;
@@ -141,18 +141,18 @@ namespace ReSolve { namespace vector {
   real_type* Vector::getData(index_type i, std::string memspace)
   {
     if ((memspace == "cpu") && (cpu_updated_ == false) && (gpu_updated_ == true )) {
-      copyData("cuda", "cpu");
+      copyData(memspace, "cpu");
       owns_cpu_data_ = true;
     } 
 
-    if ((memspace == "cuda") && (gpu_updated_ == false) && (cpu_updated_ == true )) {
-      copyData("cpu", "cuda");
+    if (((memspace == "cuda") || (memspace == "hip")) && (gpu_updated_ == false) && (cpu_updated_ == true )) {
+      copyData("cpu", memspace);
       owns_gpu_data_ = true;
     }
     if (memspace == "cpu") {
       return &h_data_[i * n_current_];
     } else {
-      if (memspace == "cuda"){
+      if ((memspace == "cuda") || (memspace == "hip")){
         return &d_data_[i * n_current_];
       } else {
         return nullptr;
@@ -164,14 +164,14 @@ namespace ReSolve { namespace vector {
   int Vector::copyData(std::string memspaceIn, std::string memspaceOut)
   {
     int control=-1;
-    if ((memspaceIn == "cpu") && (memspaceOut == "cuda")){ control = 0;}
-    if ((memspaceIn == "cuda") && (memspaceOut == "cpu")){ control = 1;}
+    if ((memspaceIn == "cpu") && ((memspaceOut == "cuda") || (memspaceOut == "hip"))){ control = 0;}
+    if (((memspaceIn == "cuda") || (memspaceIn == "hip")) && (memspaceOut == "cpu")){ control = 1;}
 
     if ((memspaceOut == "cpu") && (h_data_ == nullptr)){
       //allocate first
       h_data_ = new real_type[n_ * k_]; 
     }
-    if ((memspaceOut == "cuda") && (d_data_ == nullptr)){
+    if (((memspaceOut == "cuda") || (memspaceOut == "hip")) && (d_data_ == nullptr)){
       //allocate first
       mem_.allocateArrayOnDevice(&d_data_, n_ * k_);
     } 
@@ -200,10 +200,12 @@ namespace ReSolve { namespace vector {
       h_data_ = new real_type[n_ * k_]; 
       owns_cpu_data_ = true;
     } else {
-      if (memspace == "cuda") {
+      if ((memspace == "cuda") || (memspace == "hip")) {
         mem_.deleteOnDevice(d_data_);
         mem_.allocateArrayOnDevice(&d_data_, n_ * k_);
         owns_gpu_data_ = true;
+      } else {
+        std::cout<<"wrong memspace " <<memspace<<" "<<std::endl;
       }
     }
   }
@@ -220,7 +222,7 @@ namespace ReSolve { namespace vector {
         h_data_[i] = 0.0;
       }
     } else {
-      if (memspace == "cuda") {
+      if ((memspace == "cuda") || (memspace == "hip")) {
         if (d_data_ == nullptr) {
           mem_.allocateArrayOnDevice(&d_data_, n_ * k_);
           owns_gpu_data_ = true;
@@ -241,7 +243,7 @@ namespace ReSolve { namespace vector {
         h_data_[i] = 0.0;
       }
     } else {
-      if (memspace == "cuda") {
+      if ((memspace == "cuda") || (memspace == "hip")) {
         if (d_data_ == nullptr) {
           mem_.allocateArrayOnDevice(&d_data_, n_ * k_);
           owns_gpu_data_ = true;
@@ -263,7 +265,7 @@ namespace ReSolve { namespace vector {
         h_data_[i] = C;
       }
     } else {
-      if (memspace == "cuda") {
+      if ((memspace == "cuda") || (memspace == "hip")) {
         if (d_data_ == nullptr) {
           mem_.allocateArrayOnDevice(&d_data_, n_ * k_);
           owns_gpu_data_ = true;
@@ -284,7 +286,7 @@ namespace ReSolve { namespace vector {
         h_data_[i] = C;
       }
     } else {
-      if (memspace == "cuda") {
+      if ((memspace == "cuda") || (memspace == "hip")) {
         if (d_data_ == nullptr) {
           mem_.allocateArrayOnDevice(&d_data_, n_ * k_);
           owns_gpu_data_ = true;
@@ -322,7 +324,7 @@ namespace ReSolve { namespace vector {
       if (memspaceOut == "cpu") {
         std::memcpy(dest, data, n_current_ * sizeof(real_type));
       } else {
-        if (memspaceOut == "cuda") { 
+      if ((memspaceOut == "cuda") || (memspaceOut == "hip")) {
           mem_.copyArrayDeviceToDevice(dest, data, n_current_);
         } else {
           //error
@@ -338,7 +340,7 @@ namespace ReSolve { namespace vector {
     if (memspaceOut == "cpu") {
       std::memcpy(dest, data, n_current_ * k_ * sizeof(real_type));
     } else {
-      if (memspaceOut == "cuda") { 
+      if ((memspaceOut == "cuda") || (memspaceOut == "hip")) {
         mem_.copyArrayDeviceToDevice(dest, data, n_current_ * k_);
       } else {
         //error
diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp
index 8c89cb2f..f797f483 100644
--- a/resolve/vector/VectorHandler.cpp
+++ b/resolve/vector/VectorHandler.cpp
@@ -11,6 +11,9 @@
 #ifdef RESOLVE_USE_CUDA
 #include <resolve/vector/VectorHandlerCuda.hpp>
 #endif
+#ifdef RESOLVE_USE_HIP
+#include <resolve/vector/VectorHandlerHip.hpp>
+#endif
 
 namespace ReSolve {
   using out = io::Logger;
@@ -50,6 +53,21 @@ namespace ReSolve {
     isCpuEnabled_  = true;
   }
 #endif
+#ifdef RESOLVE_USE_HIP
+  /** 
+   * @brief constructor
+   * 
+   * @param new_workspace - workspace to be set     
+   */
+  VectorHandler::VectorHandler(LinAlgWorkspaceHIP* new_workspace)
+  {
+    hipImpl_ = new VectorHandlerHip(new_workspace);
+    cpuImpl_  = new  VectorHandlerCpu();
+
+    isHipEnabled_ = true;
+    isCpuEnabled_  = true;
+  }
+#endif
 
   /** 
    * @brief destructor     
@@ -64,7 +82,7 @@ namespace ReSolve {
    * 
    * @param[in] x The first vector
    * @param[in] y The second vector
-   * @param[in] memspace String containg memspace (cpu or cuda)
+   * @param[in] memspace String containg memspace (cpu or cuda or hip)
    * 
    * @return dot product (real number) of _x_ and _y_
    */
@@ -74,7 +92,9 @@ namespace ReSolve {
     if (memspace == "cuda" ) {
       return cudaImpl_->dot(x, y);
     } else {
-      if (memspace == "cpu") {
+      if (memspace == "hip") { 
+        return hipImpl_->dot(x, y);
+      } else if (memspace == "cpu") {
         return cpuImpl_->dot(x, y);
       } else {
         out::error() << "Not implemented (yet)" << std::endl;
@@ -88,13 +108,15 @@ namespace ReSolve {
    * 
    * @param[in] alpha The constant
    * @param[in,out] x The vector
-   * @param memspace string containg memspace (cpu or cuda)
+   * @param memspace string containg memspace (cpu or cuda or hip)
    * 
    */
   void VectorHandler::scal(const real_type* alpha, vector::Vector* x, std::string memspace)
   {
     if (memspace == "cuda" ) {
       cudaImpl_->scal(alpha, x);
+    } else if (memspace == "hip") { 
+      hipImpl_->scal(alpha, x);
     } else {
       if (memspace == "cpu") {
         cpuImpl_->scal(alpha, x);
@@ -110,7 +132,7 @@ namespace ReSolve {
    * @param[in] alpha The constant
    * @param[in] x The first vector
    * @param[in,out] y The second vector (result is return in y)
-   * @param[in]  memspace String containg memspace (cpu or cuda)
+   * @param[in]  memspace String containg memspace (cpu or cuda or hip)
    * 
    */
   void VectorHandler::axpy(const  real_type* alpha, vector::Vector* x, vector::Vector* y, std::string memspace)
@@ -119,10 +141,14 @@ namespace ReSolve {
     if (memspace == "cuda" ) {
       cudaImpl_->axpy(alpha, x, y);
     } else {
-      if (memspace == "cpu") {
-        cpuImpl_->axpy(alpha, x, y);
+      if (memspace == "hip" ) {
+        hipImpl_->axpy(alpha, x, y);      
       } else {
-        out::error() <<"Not implemented (yet)" << std::endl;
+        if (memspace == "cpu") {
+          cpuImpl_->axpy(alpha, x, y);
+        } else {
+          out::error() <<"Not implemented (yet)" << std::endl;
+        }
       }
     }
   }
@@ -139,7 +165,7 @@ namespace ReSolve {
    * @param[in] V Multivector containing the matrix, organized columnwise
    * @param[in] y Vector, k x 1 if N and n x 1 if T
    * @param[in,out] x Vector, n x 1 if N and k x 1 if T
-   * @param[in] memspace  cpu or cuda (for now)
+   * @param[in] memspace  cpu or cuda or hip (for now)
    *
    * @pre   V is stored colum-wise, _n_ > 0, _k_ > 0
    * 
@@ -148,6 +174,8 @@ namespace ReSolve {
   {
     if (memspace == "cuda") {
       cudaImpl_->gemv(transpose, n, k, alpha, beta, V, y, x);
+    } else if (memspace == "hip") {
+      hipImpl_->gemv(transpose, n, k, alpha, beta, V, y, x);
     } else if (memspace == "cpu") {
       cpuImpl_->gemv(transpose, n, k, alpha, beta, V, y, x);
     } else {
@@ -162,7 +190,7 @@ namespace ReSolve {
    * @param[in] alpha vector size k x 1
    * @param[in] x (multi)vector size size x k
    * @param[in,out] y vector size size x 1 (this is where the result is stored)
-   * @param[in] memspace string containg memspace (cpu or cuda)
+   * @param[in] memspace string containg memspace (cpu or cuda or hip)
    *
    * @pre   _k_ > 0, _size_ > 0, _size_ = x->getSize()
    *
@@ -172,6 +200,8 @@ namespace ReSolve {
     using namespace constants;
     if (memspace == "cuda") {
       cudaImpl_->massAxpy(size, alpha, k, x, y);
+    } else if (memspace == "hip") {
+      hipImpl_->massAxpy(size, alpha, k, x, y);
     } else if (memspace == "cpu") {
       cpuImpl_->massAxpy(size, alpha, k, x, y);
     } else {
@@ -188,7 +218,7 @@ namespace ReSolve {
    * @param[in] k Number of vectors in V
    * @param[in] x Multivector; 2 vectors size n x 1 each
    * @param[out] res Multivector; 2 vectors size k x 1 each (result is returned in res)
-   * @param[in] memspace String containg memspace (cpu or cuda)
+   * @param[in] memspace String containg memspace (cpu or cuda or hip)
    *
    * @pre   _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated
    *
@@ -197,6 +227,8 @@ namespace ReSolve {
   {
     if (memspace == "cuda") {
       cudaImpl_->massDot2Vec(size, V, k, x, res);
+    } else if (memspace == "hip") {
+      hipImpl_->massDot2Vec(size, V, k, x, res);
     } else if (memspace == "cpu") {
       cpuImpl_->massDot2Vec(size, V, k, x, res);
     } else {
diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp
index c17d4688..02d426b5 100644
--- a/resolve/vector/VectorHandler.hpp
+++ b/resolve/vector/VectorHandler.hpp
@@ -10,6 +10,7 @@ namespace ReSolve
   class VectorHandlerImpl;
   class LinAlgWorkspaceCpu;
   class LinAlgWorkspaceCUDA;
+  class LinAlgWorkspaceHIP;
 }
 
 
@@ -19,6 +20,7 @@ namespace ReSolve { //namespace vector {
       VectorHandler();
       VectorHandler(LinAlgWorkspaceCpu* new_workspace);
       VectorHandler(LinAlgWorkspaceCUDA* new_workspace);
+      VectorHandler(LinAlgWorkspaceHIP* new_workspace);
       ~VectorHandler();
 
       //y = alpha x + y
@@ -55,9 +57,11 @@ namespace ReSolve { //namespace vector {
     private:
       VectorHandlerImpl*  cpuImpl_{nullptr};
       VectorHandlerImpl* cudaImpl_{nullptr};
+      VectorHandlerImpl*  hipImpl_{nullptr};
 
       bool isCpuEnabled_{false};
       bool isCudaEnabled_{false};
+      bool isHipEnabled_{false};
   };
 
 } //} // namespace ReSolve::vector
diff --git a/resolve/vector/VectorHandlerHip.cpp b/resolve/vector/VectorHandlerHip.cpp
new file mode 100644
index 00000000..9f2927c7
--- /dev/null
+++ b/resolve/vector/VectorHandlerHip.cpp
@@ -0,0 +1,236 @@
+#include <iostream>
+
+#include <resolve/utilities/logger/Logger.hpp>
+#include <resolve/hip/hipKernels.h>
+#include <resolve/vector/Vector.hpp>
+#include <resolve/workspace/LinAlgWorkspace.hpp>
+#include <resolve/vector/VectorHandlerImpl.hpp>
+#include "VectorHandlerHip.hpp"
+
+namespace ReSolve {
+  using out = io::Logger;
+
+  /** 
+   * @brief empty constructor that does absolutely nothing        
+   */
+  VectorHandlerHip::VectorHandlerHip()
+  {
+  }
+
+  /** 
+   * @brief constructor
+   * 
+   * @param new_workspace - workspace to be set     
+   */
+  VectorHandlerHip:: VectorHandlerHip(LinAlgWorkspaceHIP* new_workspace)
+  {
+    workspace_ = new_workspace;
+  }
+
+  /** 
+   * @brief destructor     
+   */
+  VectorHandlerHip::~VectorHandlerHip()
+  {
+    //delete the workspace TODO
+  }
+
+  /** 
+   * @brief dot product of two vectors i.e, a = x^Ty
+   * 
+   * @param[in] x The first vector
+   * @param[in] y The second vector
+   * @param[in] memspace String containg memspace (cpu or hip)
+   * 
+   * @return dot product (real number) of _x_ and _y_
+   */
+
+  real_type VectorHandlerHip::dot(vector::Vector* x, vector::Vector* y)
+  { 
+    LinAlgWorkspaceHIP* workspaceHIP = workspace_;
+    rocblas_handle  handle_rocblas =  workspaceHIP->getRocblasHandle();
+    double nrm = 0.0;
+    rocblas_status st= rocblas_ddot (handle_rocblas,  x->getSize(), x->getData("hip"), 1, y->getData("hip"), 1, &nrm);
+    if (st!=0) {printf("dot product crashed with code %d \n", st);}
+    return nrm;
+  }
+
+  /** 
+   * @brief scale a vector by a constant i.e, x = alpha*x where alpha is a constant
+   * 
+   * @param[in] alpha The constant
+   * @param[in,out] x The vector
+   * @param memspace string containg memspace (cpu or hip)
+   * 
+   */
+  void VectorHandlerHip::scal(const real_type* alpha, vector::Vector* x)
+  {
+    LinAlgWorkspaceHIP* workspaceHIP = workspace_;
+    rocblas_handle handle_rocblas =  workspaceHIP->getRocblasHandle();
+    rocblas_status st = rocblas_dscal(handle_rocblas, x->getSize(), alpha, x->getData("hip"), 1);
+    if (st!=0) {
+      ReSolve::io::Logger::error() << "scal crashed with code " << st << "\n";
+    }
+  }
+
+  /** 
+   * @brief axpy i.e, y = alpha*x+y where alpha is a constant
+   * 
+   * @param[in] alpha The constant
+   * @param[in] x The first vector
+   * @param[in,out] y The second vector (result is return in y)
+   * @param[in]  memspace String containg memspace (cpu or hip)
+   * 
+   */
+  void VectorHandlerHip::axpy(const  real_type* alpha, vector::Vector* x, vector::Vector* y)
+  {
+    //AXPY:  y = alpha * x + y
+    LinAlgWorkspaceHIP* workspaceHIP = workspace_;
+    rocblas_handle handle_rocblas =  workspaceHIP->getRocblasHandle();
+    rocblas_daxpy(handle_rocblas,
+                  x->getSize(),
+                  alpha,
+                  x->getData("hip"),
+                  1,
+                  y->getData("hip"),
+                  1);
+  }
+
+  /** 
+   * @brief gemv computes matrix-vector product where both matrix and vectors are dense.
+   *        i.e., x = beta*x +  alpha*V*y
+   *
+   * @param[in] Transpose - yes (T) or no (N)
+   * @param[in] n Number of rows in (non-transposed) matrix
+   * @param[in] k Number of columns in (non-transposed)   
+   * @param[in] alpha Constant real number
+   * @param[in] beta Constant real number
+   * @param[in] V Multivector containing the matrix, organized columnwise
+   * @param[in] y Vector, k x 1 if N and n x 1 if T
+   * @param[in,out] x Vector, n x 1 if N and k x 1 if T
+   * @param[in] memspace  cpu or hip (for now)
+   *
+   * @pre   V is stored colum-wise, _n_ > 0, _k_ > 0
+   * 
+   */  
+  void VectorHandlerHip::gemv(std::string transpose,
+                              index_type n,
+                              index_type k,
+                              const real_type* alpha,
+                              const real_type* beta,
+                              vector::Vector* V,
+                              vector::Vector* y,
+                              vector::Vector* x)
+  {
+    LinAlgWorkspaceHIP* workspaceHIP = workspace_;
+    rocblas_handle handle_rocblas =  workspaceHIP->getRocblasHandle();
+    if (transpose == "T") {
+
+      rocblas_dgemv(handle_rocblas,
+                    rocblas_operation_transpose,
+                    n,
+                    k,
+                    alpha,
+                    V->getData("hip"),
+                    n,
+                    y->getData("hip"),
+                    1,
+                    beta,
+                    x->getData("hip"),
+                    1);
+
+    } else {
+      rocblas_dgemv(handle_rocblas,
+                    rocblas_operation_none,
+                    n,
+                    k,
+                    alpha,
+                    V->getData("hip"),
+                    n,
+                    y->getData("hip"),
+                    1,
+                    beta,
+                    x->getData("hip"),
+                    1);
+    }
+  }
+
+  /** 
+   * @brief mass (bulk) axpy i.e, y = y - x*alpha where  alpha is a vector
+   * 
+   * @param[in] size number of elements in y
+   * @param[in] alpha vector size k x 1
+   * @param[in] x (multi)vector size size x k
+   * @param[in,out] y vector size size x 1 (this is where the result is stored)
+   * @param[in] memspace string containg memspace (cpu or hip)
+   *
+   * @pre   _k_ > 0, _size_ > 0, _size_ = x->getSize()
+   *
+   */
+  void VectorHandlerHip::massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y)
+  {
+    using namespace constants;
+    if (k < 200) {
+      mass_axpy(size, k, x->getData("hip"), y->getData("hip"),alpha->getData("hip"));
+    } else {
+      LinAlgWorkspaceHIP* workspaceHIP = workspace_;
+      rocblas_handle handle_rocblas =  workspaceHIP->getRocblasHandle();
+      rocblas_dgemm(handle_rocblas,
+                    rocblas_operation_none,
+                    rocblas_operation_none,
+                    size,       // m
+                    1,          // n
+                    k,      // k
+                    &MINUSONE, // alpha
+                    x->getData("hip"), // A
+                    size,       // lda
+                    alpha->getData("hip"), // B
+                    k,      // ldb
+                    &ONE,
+                    y->getData("hip"),          // c
+                    size);      // ldc     
+    }
+  }
+
+  /** 
+   * @brief mass (bulk) dot product i.e,  V^T x, where V is n x k dense multivector
+   * (a dense multivector consisting of k vectors size n) and x is k x 2 dense
+   * multivector (a multivector consisiting of two vectors size n each)
+   * 
+   * @param[in] size Number of elements in a single vector in V
+   * @param[in] V Multivector; k vectors size n x 1 each
+   * @param[in] k Number of vectors in V
+   * @param[in] x Multivector; 2 vectors size n x 1 each
+   * @param[out] res Multivector; 2 vectors size k x 1 each (result is returned in res)
+   * @param[in] memspace String containg memspace (cpu or hip)
+   *
+   * @pre   _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated
+   *
+   */
+  void VectorHandlerHip::massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res)
+  {
+    using namespace constants;
+
+    if (k < 200) {
+      mass_inner_product_two_vectors(size, k, x->getData("hip") , x->getData(1, "hip"), V->getData("hip"), res->getData("hip"));
+    } else {
+      LinAlgWorkspaceHIP* workspaceHIP = workspace_;
+      rocblas_handle handle_rocblas =  workspaceHIP->getRocblasHandle();
+      rocblas_dgemm(handle_rocblas,
+                    rocblas_operation_transpose,
+                    rocblas_operation_none,
+                    k + 1,   //m
+                    2,       //n
+                    size,    //k
+                    &ONE,   //alpha
+                    V->getData("hip"),       //A
+                    size,    //lda
+                    x->getData("hip"),       //B
+                    size,    //ldb
+                    &ZERO,
+                    res->getData("hip"),     //c
+                    k + 1);  //ldc 
+    }
+  }
+
+} // namespace ReSolve
diff --git a/resolve/vector/VectorHandlerHip.hpp b/resolve/vector/VectorHandlerHip.hpp
new file mode 100644
index 00000000..7e5085e3
--- /dev/null
+++ b/resolve/vector/VectorHandlerHip.hpp
@@ -0,0 +1,57 @@
+#pragma once
+#include <string>
+
+namespace ReSolve
+{ 
+  namespace vector
+  {
+    class Vector;
+  }
+  class LinAlgWorkspaceHIP;
+  class VectorHandlerImpl;
+}
+
+
+namespace ReSolve { //namespace vector {
+  class VectorHandlerHip : public VectorHandlerImpl
+  { 
+    public:
+      VectorHandlerHip();
+      VectorHandlerHip(LinAlgWorkspaceHIP* workspace);
+      virtual ~VectorHandlerHip();
+
+      //y = alpha x + y
+      virtual void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y);
+
+      //dot: x \cdot y
+      virtual real_type dot(vector::Vector* x, vector::Vector* y);
+
+      //scal = alpha * x
+      virtual void scal(const real_type* alpha, vector::Vector* x);
+
+      //mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise
+      virtual void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y);
+
+      //mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise
+      //Size = n
+      virtual void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res);
+
+      /** gemv:
+       * if `transpose = N` (no), `x = beta*x +  alpha*V*y`,
+       * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`.
+       * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`,
+       * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`.
+       */ 
+      virtual void gemv(std::string transpose,
+                        index_type n,
+                        index_type k,
+                        const real_type* alpha,
+                        const real_type* beta,
+                        vector::Vector* V,
+                        vector::Vector* y,
+                        vector::Vector* x);
+    private:
+      LinAlgWorkspaceHIP* workspace_;
+  };
+
+} //} // namespace ReSolve::vector
diff --git a/resolve/workspace/CMakeLists.txt b/resolve/workspace/CMakeLists.txt
index 673fac4b..a34c2191 100644
--- a/resolve/workspace/CMakeLists.txt
+++ b/resolve/workspace/CMakeLists.txt
@@ -16,10 +16,15 @@ set(ReSolve_Workspace_CUDASDK_SRC
     LinAlgWorkspaceCUDA.cpp
 )
 
+set(ReSolve_Workspace_ROCM_SRC
+  LinAlgWorkspaceHIP.cpp
+)
+
 set(ReSolve_Workspace_HEADER_INSTALL
   LinAlgWorkspace.hpp
   LinAlgWorkspaceCpu.hpp
   LinAlgWorkspaceCUDA.hpp
+  LinAlgWorkspaceHIP.hpp
 )
 
 # If cuda is enabled, add CUDA SDK workspace files
@@ -27,6 +32,10 @@ if(RESOLVE_USE_CUDA)
   set(ReSolve_Workspace_SRC ${ReSolve_Workspace_SRC} ${ReSolve_Workspace_CUDASDK_SRC})
 endif()
 
+if(RESOLVE_USE_HIP)
+  set(ReSolve_Workspace_SRC ${ReSolve_Workspace_SRC} ${ReSolve_Workspace_ROCM_SRC})
+endif()
+
 add_library(resolve_workspace SHARED ${ReSolve_Workspace_SRC})
 
 # If CUDA is enabled, link to ReSolve CUDA backend
@@ -34,6 +43,10 @@ if(RESOLVE_USE_CUDA)
   target_link_libraries(resolve_workspace PUBLIC resolve_backend_cuda)
 endif(RESOLVE_USE_CUDA)  
 
+if(RESOLVE_USE_HIP)
+  target_link_libraries(resolve_workspace PUBLIC resolve_backend_hip)
+endif(RESOLVE_USE_HIP)  
+
 target_include_directories(resolve_workspace INTERFACE
     $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}>
     $<INSTALL_INTERFACE:include>
diff --git a/resolve/workspace/LinAlgWorkspace.hpp b/resolve/workspace/LinAlgWorkspace.hpp
index 6da58fda..4efe834e 100644
--- a/resolve/workspace/LinAlgWorkspace.hpp
+++ b/resolve/workspace/LinAlgWorkspace.hpp
@@ -6,3 +6,7 @@
 #include <resolve/workspace/LinAlgWorkspaceCUDA.hpp>
 #endif
 
+#ifdef RESOLVE_USE_HIP
+#include <resolve/workspace/LinAlgWorkspaceHIP.hpp>
+#endif
+
diff --git a/resolve/workspace/LinAlgWorkspaceHIP.cpp b/resolve/workspace/LinAlgWorkspaceHIP.cpp
new file mode 100644
index 00000000..e64dff17
--- /dev/null
+++ b/resolve/workspace/LinAlgWorkspaceHIP.cpp
@@ -0,0 +1,75 @@
+#include <resolve/workspace/LinAlgWorkspaceHIP.hpp>
+
+namespace ReSolve
+{
+  LinAlgWorkspaceHIP::LinAlgWorkspaceHIP()
+  {
+    handle_rocsparse_   = nullptr;
+    handle_rocblas_     = nullptr;
+
+    matvec_setup_done_ = false;
+  }
+
+  LinAlgWorkspaceHIP::~LinAlgWorkspaceHIP()
+  {
+    rocsparse_destroy_handle(handle_rocsparse_);
+    rocblas_destroy_handle(handle_rocblas_);
+    rocsparse_destroy_mat_descr(mat_A_);
+  }
+
+  rocsparse_handle LinAlgWorkspaceHIP::getRocsparseHandle()
+  {
+    return handle_rocsparse_;
+  }
+
+  void LinAlgWorkspaceHIP::setRocsparseHandle(rocsparse_handle handle)
+  {
+    handle_rocsparse_ = handle;
+  }
+
+  rocblas_handle LinAlgWorkspaceHIP::getRocblasHandle()
+  {
+    return handle_rocblas_;
+  }
+
+  void LinAlgWorkspaceHIP::setRocblasHandle(rocblas_handle handle)
+  {
+    handle_rocblas_ = handle;
+  }
+
+  rocsparse_mat_descr LinAlgWorkspaceHIP::getSpmvMatrixDescriptor()
+  {
+    return mat_A_;
+  }
+
+  void LinAlgWorkspaceHIP::setSpmvMatrixDescriptor(rocsparse_mat_descr mat)
+  {
+    mat_A_ = mat;
+  }
+
+  rocsparse_mat_info  LinAlgWorkspaceHIP::getSpmvMatrixInfo()
+  {
+    return info_A_;
+  }
+
+  void LinAlgWorkspaceHIP::setSpmvMatrixInfo(rocsparse_mat_info  info)
+  {
+    info_A_ = info;
+  }
+
+  bool LinAlgWorkspaceHIP::matvecSetup()
+  {
+    return matvec_setup_done_;
+  }
+
+  void LinAlgWorkspaceHIP::matvecSetupDone()
+  {
+    matvec_setup_done_ = true;
+  }
+
+  void LinAlgWorkspaceHIP::initializeHandles()
+  {
+    rocsparse_create_handle(&handle_rocsparse_);
+                            rocblas_create_handle(&handle_rocblas_);
+                            }
+                            } // namespace ReSolve
diff --git a/resolve/workspace/LinAlgWorkspaceHIP.hpp b/resolve/workspace/LinAlgWorkspaceHIP.hpp
new file mode 100644
index 00000000..fbb55349
--- /dev/null
+++ b/resolve/workspace/LinAlgWorkspaceHIP.hpp
@@ -0,0 +1,52 @@
+#pragma once
+
+#include <rocsparse/rocsparse.h>
+#include <rocblas/rocblas.h>
+#include <hip/hip_runtime.h>
+
+#include <resolve/MemoryUtils.hpp>
+
+namespace ReSolve
+{
+  class LinAlgWorkspaceHIP
+  {
+    public:
+      LinAlgWorkspaceHIP();
+      ~LinAlgWorkspaceHIP();
+
+      rocblas_handle getRocblasHandle();
+      rocsparse_handle getRocsparseHandle();      
+      rocsparse_mat_descr getSpmvMatrixDescriptor();
+      rocsparse_mat_info getSpmvMatrixInfo();
+
+      void setRocblasHandle(rocblas_handle handle);
+      void setRocsparseHandle(rocsparse_handle handle);
+      void setSpmvMatrixDescriptor(rocsparse_mat_descr mat);
+      void setSpmvMatrixInfo(rocsparse_mat_info info);
+
+      void initializeHandles();
+
+      bool matvecSetup();
+      void matvecSetupDone();
+
+    private:
+      //handles
+      rocblas_handle handle_rocblas_;
+      rocsparse_handle handle_rocsparse_;
+
+      //matrix descriptors
+      rocsparse_mat_descr  mat_A_; 
+
+      //vector descriptors not needed, rocsparse uses RAW pointers.
+
+      //buffers
+      // there is no buffer needed in matvec
+      bool matvec_setup_done_; //check if setup is done for matvec (note: no buffer but there is analysis)
+
+      //info - but we need info
+      rocsparse_mat_info  info_A_;
+
+      MemoryHandler mem_;
+  };
+
+} // namespace ReSolve
diff --git a/tests/unit/matrix/CMakeLists.txt b/tests/unit/matrix/CMakeLists.txt
index 8906c2c6..8476f181 100644
--- a/tests/unit/matrix/CMakeLists.txt
+++ b/tests/unit/matrix/CMakeLists.txt
@@ -20,4 +20,4 @@ install(TARGETS ${installable_tests}
         RUNTIME DESTINATION bin/resolve/tests/unit)
 
 add_test(NAME matrix_test         COMMAND $<TARGET_FILE:runMatrixIoTests.exe>)
-add_test(NAME matrix_handler_test COMMAND $<TARGET_FILE:runMatrixHandlerTests.exe>)
\ No newline at end of file
+add_test(NAME matrix_handler_test COMMAND $<TARGET_FILE:runMatrixHandlerTests.exe>)
diff --git a/tests/unit/matrix/MatrixHandlerTests.hpp b/tests/unit/matrix/MatrixHandlerTests.hpp
index e203017a..d7fe8449 100644
--- a/tests/unit/matrix/MatrixHandlerTests.hpp
+++ b/tests/unit/matrix/MatrixHandlerTests.hpp
@@ -49,6 +49,7 @@ class MatrixHandlerTests : TestBase
     vector::Vector x(N);
     vector::Vector y(N);
     x.allocate(memspace_);
+    if (x.getData(memspace_) == NULL) printf("oups we have an issue \n");
     y.allocate(memspace_);
 
     x.setToConst(1.0, memspace_);
@@ -80,6 +81,12 @@ class MatrixHandlerTests : TestBase
       LinAlgWorkspaceCUDA* workspace = new LinAlgWorkspaceCUDA();
       workspace->initializeHandles();
       return new MatrixHandler(workspace);
+#endif
+#ifdef RESOLVE_USE_HIP
+    } else if (memspace_ == "hip") {
+      LinAlgWorkspaceHIP* workspace = new LinAlgWorkspaceHIP();
+      workspace->initializeHandles();
+      return new MatrixHandler(workspace);
 #endif
     } else {
       std::cout << "ReSolve not built with support for memory space " << memspace_ << "\n";
@@ -152,7 +159,7 @@ class MatrixHandlerTests : TestBase
     A->setUpdated("cpu");
     // std::cout << rowptr[i] << "\n";
 
-    if (memspace == "cuda") {
+    if ((memspace == "cuda") || (memspace == "hip")) {
       A->copyData(memspace);
     }
 
diff --git a/tests/unit/matrix/runMatrixHandlerTests.cpp b/tests/unit/matrix/runMatrixHandlerTests.cpp
index 6eee90d5..26ad70b0 100644
--- a/tests/unit/matrix/runMatrixHandlerTests.cpp
+++ b/tests/unit/matrix/runMatrixHandlerTests.cpp
@@ -33,5 +33,17 @@ int main(int, char**)
   }
 #endif
 
+#ifdef RESOLVE_USE_HIP
+  {
+    std::cout << "Running tests with HIP backend:\n";
+    ReSolve::tests::MatrixHandlerTests test("hip");
+
+    result += test.matrixHandlerConstructor();
+    result += test.matrixOneNorm();
+    result += test.matVec(50);
+
+    std::cout << "\n";
+  }
+#endif
   return result.summary();
 }
diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp
index d2f8c73c..60020ec5 100644
--- a/tests/unit/vector/VectorHandlerTests.hpp
+++ b/tests/unit/vector/VectorHandlerTests.hpp
@@ -1,6 +1,7 @@
 #pragma once
 #include <string>
 #include <vector>
+#include <iomanip>
 #include <sstream>
 #include <iterator>
 #include <algorithm>
@@ -141,13 +142,13 @@ namespace ReSolve {
             }
             x->setToConst(ii, c, memspace_);
           }
+
           index_type r = K % 2;
           real_type res = (real_type) ((floor((real_type) K / 2.0) + r) * 1.0 + floor((real_type) K / 2.0) * (-0.5));
 
           handler->massAxpy(N, alpha, K, x, y, memspace_);
           status *= verifyAnswer(y, 2.0 - res, memspace_);
-
-
+         
           delete handler;
           delete x;
           delete y;
@@ -229,6 +230,12 @@ namespace ReSolve {
             LinAlgWorkspaceCUDA* workspace = new LinAlgWorkspaceCUDA();
             workspace->initializeHandles();
             return new VectorHandler(workspace);
+#endif
+#ifdef RESOLVE_USE_HIP
+          } else if (memspace_ == "hip") {
+            LinAlgWorkspaceHIP* workspace = new LinAlgWorkspaceHIP();
+            workspace->initializeHandles();
+            return new VectorHandler(workspace);
 #endif
           } else {
             std::cout << "ReSolve not built with support for memory space " << memspace_ << "\n";
@@ -247,6 +254,7 @@ namespace ReSolve {
           for (index_type i = 0; i < x->getSize(); ++i) {
             // std::cout << x->getData("cpu")[i] << "\n";
             if (!isEqual(x->getData("cpu")[i], answer)) {
+              std::cout << std::setprecision(16);
               status = false;
               std::cout << "Solution vector element x[" << i << "] = " << x->getData("cpu")[i]
                 << ", expected: " << answer << "\n";
diff --git a/tests/unit/vector/runVectorHandlerTests.cpp b/tests/unit/vector/runVectorHandlerTests.cpp
index 77e99471..9bb543a5 100644
--- a/tests/unit/vector/runVectorHandlerTests.cpp
+++ b/tests/unit/vector/runVectorHandlerTests.cpp
@@ -37,5 +37,22 @@ int main(int, char**)
   }
 #endif
 
+#ifdef RESOLVE_USE_HIP
+  {
+    std::cout << "Running tests with HIP backend:\n";
+    ReSolve::tests::VectorHandlerTests test("hip");
+
+    result += test.dot(5000);
+    result += test.axpy(5000);
+    result += test.scal(5000);
+    result += test.gemv(5000, 10);
+    result += test.massAxpy(100, 10);
+    result += test.massAxpy(1000, 300);
+    result += test.massDot(100, 10);
+    result += test.massDot(1000, 30);
+
+    std::cout << "\n";
+  }
+#endif
   return result.summary();
 }