runtime lib

cornell-zhang · Feb 6, 2024 · 1572f84 · 1572f84
1 parent 3b09a99
commit 1572f84
Show file tree

Hide file tree

Showing 26 changed files with 3,176 additions and 35 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,7 +1,7 @@
 build/*
 /build*
-# /scripts*
-# scripts/*
+/scripts*
+scripts/*
 evaluation/CPU/build*
 storage-src/1
 /eigen
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,8 @@
+{
+    "files.associations": {
+        "random": "cpp",
+        "cmath": "cpp",
+        "iostream": "cpp",
+        "ostream": "cpp"
+    }
+}
diff --git a/README.pdf b/README.pdf
diff --git a/evaluation/FormatCustomization/PageRank/Intel_MKL/Makefile b/evaluation/FormatCustomization/PageRank/Intel_MKL/Makefile
@@ -0,0 +1,87 @@
+# TACO_INCLUDE_DIR=/home/ubuntu/efs/software/taco/include
+# TACO_LIBRARY_DIR=/home/ubuntu/efs/software/taco/build/lib
+
+# GBLAS_INCLUDE_DIR=/home/ubuntu/project/GraphBLAS/include
+# GBLAS_LIBRARY_DIR=/home/ubuntu/project/GraphBLAS/lib
+
+#MKL_INCLUDE_DIR=/work/shared/common/CAD_tool/Intel/mkl/mkl/include
+#MKL_LIBRARY_DIR=/work/shared/common/CAD_tool/Intel/mkl/mkl/lib/intel64
+
+MKL_INCLUDE_DIR=/opt/intel/oneapi/mkl/latest/include
+MKL_LIBRARY_DIR=/opt/intel/oneapi/mkl/latest/lib/intel64
+
+# CNPY_INCLUDE_DIR=/work/shared/common/project_build/graphblas/software/cnpy
+# CNPY_LIBRARY_DIR=/work/shared/common/project_build/graphblas/software/cnpy/build
+
+BUILD_DIR=./build
+
+# taco_sddmm: taco_sddmm.cpp
+# 	mkdir -p $(BUILD_DIR)
+# 	g++ -std=c++14 -O3 -I${TACO_INCLUDE_DIR} -L${TACO_LIBRARY_DIR} taco_sddmm.cpp -o $(BUILD_DIR)/taco_sddmm -ltaco
+
+# graphblas_spmm: graphblas_spmm.cpp
+# 	mkdir -p $(BUILD_DIR)
+# 	g++ -std=c++11 -O3 -I${GBLAS_INCLUDE_DIR} -Wl,-rpath,${GBLAS_LIBRARY_DIR} -L${GBLAS_LIBRARY_DIR} \
+# 		graphblas_spmm.cpp -o $(BUILD_DIR)/graphblas_spmm -llagraph -lgraphblas -lm -lcnpy
+
+mkl_spmm: mkl_spmm.cpp
+	mkdir -p $(BUILD_DIR)
+	g++ -std=c++11 -O3 -I${MKL_INCLUDE_DIR} \
+		${CXXFLAGS} \
+		-Wl,--no-as-needed \
+		mkl_spmm.cpp -o $(BUILD_DIR)/mkl_spmm \
+		-Wl,--start-group ${MKL_LIBRARY_DIR}/libmkl_intel_lp64.a \
+		${MKL_LIBRARY_DIR}/libmkl_core.a ${MKL_LIBRARY_DIR}/libmkl_gnu_thread.a -Wl,--end-group \
+		-fopenmp -lpthread -lm -ldl
+
+mkl_spmv: mkl_spmv.cpp
+	mkdir -p $(BUILD_DIR)
+	g++ -std=c++11 -O3 -I${MKL_INCLUDE_DIR} \
+		${CXXFLAGS} \
+		-Wl,--no-as-needed \
+		mkl_spmv.cpp -o $(BUILD_DIR)/mkl_spmv \
+		-Wl,--start-group ${MKL_LIBRARY_DIR}/libmkl_intel_lp64.a \
+		${MKL_LIBRARY_DIR}/libmkl_core.a ${MKL_LIBRARY_DIR}/libmkl_gnu_thread.a -Wl,--end-group \
+		-fopenmp -lpthread -lm -ldl 
+
+mkl_pagerank: mkl_pagerank.cpp
+	mkdir -p $(BUILD_DIR)
+	g++ -std=c++11 -O3 -I${MKL_INCLUDE_DIR} \
+		${CXXFLAGS} \
+		-Wl,--no-as-needed \
+		mkl_pagerank.cpp -o $(BUILD_DIR)/mkl_pagerank \
+		-Wl,--start-group ${MKL_LIBRARY_DIR}/libmkl_intel_lp64.a \
+		${MKL_LIBRARY_DIR}/libmkl_core.a ${MKL_LIBRARY_DIR}/libmkl_gnu_thread.a -Wl,--end-group \
+		-fopenmp -lpthread -lm -ldl 
+
+mkl_spgemm: mkl_spgemm.cpp
+	mkdir -p $(BUILD_DIR)
+	g++ -std=c++11 -O3 -I${MKL_INCLUDE_DIR} \
+				${CXXFLAGS} \
+                -Wl,--no-as-needed \
+                mkl_spgemm.cpp -o $(BUILD_DIR)/mkl_spgemm \
+                -Wl,--start-group ${MKL_LIBRARY_DIR}/libmkl_intel_lp64.a \
+                ${MKL_LIBRARY_DIR}/libmkl_core.a ${MKL_LIBRARY_DIR}/libmkl_gnu_thread.a -Wl,--end-group \
+                -fopenmp -lpthread -lm -ldl
+
+mkl_spmspv: mkl_spmspv.cpp
+	mkdir -p $(BUILD_DIR)
+	g++ -std=c++11 -O3 -I${MKL_INCLUDE_DIR} \
+                -Wl,--no-as-needed \
+                mkl_spmspv.cpp -o $(BUILD_DIR)/mkl_spmspv \
+                -Wl,--start-group ${MKL_LIBRARY_DIR}/libmkl_intel_lp64.a \
+                ${MKL_LIBRARY_DIR}/libmkl_core.a ${MKL_LIBRARY_DIR}/libmkl_gnu_thread.a -Wl,--end-group \
+                -fopenmp -lpthread -lm -ldl
+
+mkl_spgemm_lp: mkl_spgemm_lp.cpp
+	mkdir -p $(BUILD_DIR)
+	g++ -std=c++11 -O3 -I${MKL_INCLUDE_DIR} \
+				${CXXFLAGS} \
+                -Wl,--no-as-needed \
+                mkl_spgemm_lp.cpp -o $(BUILD_DIR)/mkl_spgemm_lp \
+                -Wl,--start-group ${MKL_LIBRARY_DIR}/libmkl_intel_lp64.a \
+                ${MKL_LIBRARY_DIR}/libmkl_core.a ${MKL_LIBRARY_DIR}/libmkl_gnu_thread.a -Wl,--end-group \
+                -fopenmp -lpthread -lm -ldl
+
+clean:
+	rm -rf ${BUILD_DIR}
diff --git a/evaluation/FormatCustomization/PageRank/Intel_MKL/build/mkl_pagerank b/evaluation/FormatCustomization/PageRank/Intel_MKL/build/mkl_pagerank
diff --git a/evaluation/FormatCustomization/PageRank/Intel_MKL/mkl_pagerank.cpp b/evaluation/FormatCustomization/PageRank/Intel_MKL/mkl_pagerank.cpp
@@ -0,0 +1,136 @@
+#include <string>
+#include <iostream>
+#include <chrono>
+#include <random>
+#include <cstdlib>
+#include <numeric>
+
+#include "mtx_read.h"
+#include "mkl_spblas.h"
+
+using namespace std;
+
+//#define VAR var
+typedef double scalar_t;
+float test_spmv(sparse_matrix_t* AdjMatrix, struct matrix_descr descrAdjMatrix,
+                int num_src_vertices, int num_dst_vertices) {
+    scalar_t* Vector = (scalar_t*)malloc(sizeof(scalar_t) * num_src_vertices);
+    for (int i = 0; i < num_src_vertices; i++) {
+        Vector[i] = 1.0/num_src_vertices;
+    }
+    scalar_t* Out = (scalar_t*)malloc(sizeof(scalar_t) * num_dst_vertices);
+    for (int i = 0; i < num_dst_vertices; i++) {
+        Out[i] = 0.0;
+    }
+    scalar_t* PrevOut = (scalar_t*)malloc(sizeof(scalar_t) * num_dst_vertices);
+    for (int i = 0; i < num_dst_vertices; i++) {
+        PrevOut[i] = 0.0;
+    }
+
+    scalar_t alpha = 1.0;
+    scalar_t beta = 0;
+    int num_runs = 0;
+    scalar_t sum;
+    scalar_t max_diff;
+
+    auto t1 = std::chrono::high_resolution_clock::now();
+    // for (int i = 0; i < num_runs; i++) {
+    do {
+        mkl_sparse_d_mv(SPARSE_OPERATION_NON_TRANSPOSE,
+                        alpha,
+                        *AdjMatrix,
+                        descrAdjMatrix,
+                        Vector,
+                        beta,
+                        Out);
+        max_diff = 0;
+        sum = 0;
+        for (int j = 0; j < num_dst_vertices; j++) {
+            max_diff = max(max_diff, abs(Out[j]-PrevOut[j]));
+            PrevOut[j] = Out[j];
+            // cout << Vector[j] << "  ";
+            Vector[j] = Out[j];
+            sum = Out[j] + sum;
+        }
+        // cout << endl;
+        if (abs(sum-1)>1e-2)
+            cout << sum<<endl;
+        num_runs ++;
+        cout << "max_diff = " << max_diff << endl;
+    } while (max_diff > 1e-8);
+    cout << "num_runs = " << num_runs << endl;
+    auto t2 = std::chrono::high_resolution_clock::now();
+    float total_time = float(std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count()) / 1000000;
+    cout << "total time: " << total_time << endl;
+    float average_time_in_sec = total_time / num_runs;
+    return average_time_in_sec;
+}
+
+
+int main(int argc, char* argv[]) {
+    char *file_name = argv[1];
+/*
+     parse_CSC<double> input(file_name);
+
+     int num_dst_vertices = input.num_cols;
+     int num_src_vertices = input.num_rows;
+
+     sparse_matrix_t AdjMatrix;
+     mkl_sparse_d_create_csc(&AdjMatrix,
+                             SPARSE_INDEX_BASE_ONE,
+                             input.num_rows,
+                             input.num_cols,
+                             input.cscColPtr,
+                             input.cscColPtr + 1,
+                             input.cscRowInd,
+                             input.cscValue);
+*/
+    parse_CSR<scalar_t> input(file_name);
+
+    int num_dst_vertices = input.num_rows;
+    int num_src_vertices = input.num_cols;
+
+    sparse_matrix_t AdjMatrix;
+    mkl_sparse_d_create_csr(&AdjMatrix,
+                            SPARSE_INDEX_BASE_ZERO,
+                            input.num_rows,
+                            input.num_cols,
+                            input.csrRowPtr,
+                            input.csrRowPtr + 1,
+                            input.csrColInd,
+                            input.csrValue);
+
+
+    // printf("cscColPtr: \n");
+    // for (unsigned i = 0; i < input.num_cols + 1; i++) {
+    //     printf("%d  ", *(input.cscColPtr + i));
+    // }
+    // printf("\n");
+
+//    parse_COO<double> input(file_name);
+
+//    int num_dst_vertices = input.num_rows;
+//    int num_src_vertices = input.num_cols;
+
+//    sparse_matrix_t AdjMatrix;
+//    mkl_sparse_d_create_coo(&AdjMatrix,
+//                            SPARSE_INDEX_BASE_ONE,
+//                            input.num_rows,
+//                            input.num_cols,
+//                            input.num_nnz,
+//                            input.cooRowInd,
+//                            input.cooColInd,
+//                            input.cooValue);
+
+    mkl_sparse_optimize(AdjMatrix);
+
+    struct matrix_descr descrAdjMatrix;
+    descrAdjMatrix.type = SPARSE_MATRIX_TYPE_GENERAL;
+
+    float average_time_in_sec = test_spmv(&AdjMatrix, descrAdjMatrix, num_src_vertices, num_dst_vertices);
+    std::cout << "average_time = " << average_time_in_sec * 1000 << " ms" << std::endl;
+    float throughput = input.num_nnz * 2 / average_time_in_sec / 1000 / 1000 / 1000;
+    std::cout << "THROUGHPUT = " << throughput << " GOPS" << std::endl;
+
+    return 0;
+}
diff --git a/evaluation/FormatCustomization/PageRank/Intel_MKL/mkl_spgemm.cpp b/evaluation/FormatCustomization/PageRank/Intel_MKL/mkl_spgemm.cpp
@@ -0,0 +1,107 @@
+#include <string>
+#include <iostream>
+#include <chrono>
+#include <random>
+#include <vector>
+
+#include "mtx_read.h"
+#include "mkl_spblas.h"
+#include "mkl.h"
+
+typedef double scalar_t;
+
+
+int test_ops(MKL_INT rows, MKL_INT cols, MKL_INT *row_ptr, MKL_INT *col_idx) {
+    int nnz = 0;
+    for(int i = 0; i < rows; i++) {
+//        std::cout << "Finish initialize" << std::endl;
+        for(int j = row_ptr[i]; j < row_ptr[i+1]; j++) {
+            int idx = col_idx[j];
+            nnz = nnz + 2* (row_ptr[idx+1] - row_ptr[idx]); 
+        }
+    }
+    return nnz;
+}
+
+int main(int argc, char* argv[]) {
+    char *file_name = argv[1];
+
+/*
+     parse_CSC<double> input(file_name);
+
+     int num_dst_vertices = input.num_cols;
+     int num_src_vertices = input.num_rows;
+
+     sparse_matrix_t AdjMatrix;
+     mkl_sparse_d_create_csc(&AdjMatrix,
+                             SPARSE_INDEX_BASE_ONE,
+                             input.num_rows,
+                             input.num_cols,
+                             input.cscColPtr,
+                             input.cscColPtr + 1,
+                             input.cscRowInd,
+                             input.cscValue);
+*/
+    parse_CSR<scalar_t> input0(file_name);
+    sparse_matrix_t matA;
+    mkl_sparse_d_create_csr(&matA,
+                            SPARSE_INDEX_BASE_ZERO,
+                            input0.num_rows,
+                            input0.num_cols,
+                            input0.csrRowPtr,
+                            input0.csrRowPtr + 1,
+                            input0.csrColInd,
+                            input0.csrValue);
+    mkl_sparse_optimize(matA);
+
+    parse_CSR<scalar_t> input1(file_name);
+    sparse_matrix_t matB;
+    mkl_sparse_d_create_csr(&matB,
+                            SPARSE_INDEX_BASE_ZERO,
+                            input1.num_rows,
+                            input1.num_cols,
+                            input1.csrRowPtr,
+                            input1.csrRowPtr + 1,
+                            input1.csrColInd,
+                            input1.csrValue);
+    mkl_sparse_optimize(matB);
+
+    int num_runs = var;
+
+    auto t1 = std::chrono::high_resolution_clock::now();
+    for (int i = 0; i < num_runs; i++) {  
+        sparse_matrix_t matC = NULL;  
+        mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE, matA, matB, &matC);
+        sparse_index_base_t indexing;
+        MKL_INT rows;
+        MKL_INT cols;
+        MKL_INT *pointerB_C;
+        MKL_INT *pointerE_C;
+        MKL_INT *columns_C;
+        scalar_t *values_C;
+        mkl_sparse_d_export_csr(matC, &indexing, &rows, &cols, &pointerB_C, &pointerE_C, &columns_C, &values_C);
+        mkl_free_buffers();
+        mkl_free(matC);
+        mkl_free(pointerB_C);
+//        mkl_free(pointerE_C);
+        mkl_free(columns_C);
+        mkl_free(values_C);
+    }
+    auto t2 = std::chrono::high_resolution_clock::now();
+    float total_time = float(std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count()) / 1000000;
+    printf("total time: %fs\n", total_time);
+    float average_time_in_sec = total_time / num_runs;
+    std::cout << "average_time = " << average_time_in_sec * 1000 << " ms" << std::endl;
+ //   int ops = test_ops(input0.num_rows, input0.num_cols, input0.csrRowPtr, input0.csrColInd);
+ //   std::cout << "The tested ops is " << ops << std::endl;
+ //   std::cout << "output nnz is " << pointerB_C[rows] << std::endl;
+
+//    for(int i = 0; i < rows+1; i++) {
+//        std::cout << "intput row_pointer[" << i << "] is " << input0.csrRowPtr[i] << std::endl;
+//        std::cout << "input col_index[" << i << "] is " << input0.csrColInd[i] << std::endl;
+//        std::cout << "row_pointer[" << i << "] is " << pointerB_C[i] << std::endl;
+//        std::cout << "Column_idxp[" << i << "] is " << columns_C[i] << std::endl;
+//    }
+
+    return 0;
+}