[Datatype] support both F32 and F64

cornell-zhang · Nov 7, 2023 · 4f79c47 · 4f79c47
1 parent c497389
commit 4f79c47
Show file tree

Hide file tree

Showing 31 changed files with 3,216 additions and 2,317 deletions.
diff --git a/lib/Runtime/UniSparseTempLibs.cpp b/lib/Runtime/UniSparseTempLibs.cpp
diff --git a/lib/Runtime/UniSparseUtils.cpp b/lib/Runtime/UniSparseUtils.cpp
diff --git a/lib/Transforms/LowerFormatConversionPass.cpp b/lib/Transforms/LowerFormatConversionPass.cpp
diff --git a/test/UniSparse/Integrate/CPU/csc_spmm b/test/UniSparse/Integrate/CPU/csc_spmm
diff --git a/test/UniSparse/Integrate/CPU/csc_spmv b/test/UniSparse/Integrate/CPU/csc_spmv
diff --git a/test/UniSparse/Integrate/CPU/csr_spmm b/test/UniSparse/Integrate/CPU/csr_spmm
diff --git a/test/UniSparse/Integrate/CPU/csr_spmv b/test/UniSparse/Integrate/CPU/csr_spmv
diff --git a/test/UniSparse/Integrate/CPU/dcsc_spmm b/test/UniSparse/Integrate/CPU/dcsc_spmm
diff --git a/test/UniSparse/Integrate/CPU/dcsc_spmv b/test/UniSparse/Integrate/CPU/dcsc_spmv
diff --git a/test/UniSparse/Integrate/CPU/dcsr_spmm b/test/UniSparse/Integrate/CPU/dcsr_spmm
diff --git a/test/UniSparse/Integrate/CPU/dcsr_spmv b/test/UniSparse/Integrate/CPU/dcsr_spmv
diff --git a/...rse/Integrate/CPU/unisparse_coo_spmm.mlir → ...Integrate/CPU/unisparse_coo_spmm_F32.mlir b/...rse/Integrate/CPU/unisparse_coo_spmm.mlir → ...Integrate/CPU/unisparse_coo_spmm_F32.mlir
@@ -1,14 +1,14 @@
-// unisparse-opt ./unisparse_coo_spmm.mlir -unisparse-codegen -lower-format-conversion -lower-struct -dce | \
+// unisparse-opt ./unisparse_coo_spmm_F32.mlir -unisparse-codegen -lower-format-conversion -lower-struct -dce | \
 // mlir-opt -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" \
 // -finalizing-bufferize -convert-linalg-to-loops -convert-vector-to-scf -convert-scf-to-cf -lower-affine \
 // -convert-vector-to-llvm -convert-memref-to-llvm -convert-complex-to-standard -convert-math-to-llvm \
 // -convert-math-to-libm -convert-complex-to-libm -convert-complex-to-llvm -convert-func-to-llvm \
-// -reconcile-unrealized-casts  | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o coo_spmm.o
+// -reconcile-unrealized-casts  | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o coo_spmm_F32.o
 
-// clang++ coo_spmm.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils \
-//         -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o coo_spmm
+// clang++ coo_spmm_F32.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils \
+//         -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o coo_spmm_F32
 
-// ./coo_spmm
+// ./coo_spmm_F32
 
 !Filename = !llvm.ptr<i8>
 

diff --git a/test/UniSparse/Integrate/CPU/unisparse_coo_spmm_F64.mlir b/test/UniSparse/Integrate/CPU/unisparse_coo_spmm_F64.mlir
@@ -0,0 +1,86 @@
+// unisparse-opt ./unisparse_coo_spmm_F64.mlir -unisparse-codegen -lower-format-conversion -lower-struct -dce | \
+// mlir-opt -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" \
+// -finalizing-bufferize -convert-linalg-to-loops -convert-vector-to-scf -convert-scf-to-cf -lower-affine \
+// -convert-vector-to-llvm -convert-memref-to-llvm -convert-complex-to-standard -convert-math-to-llvm \
+// -convert-math-to-libm -convert-complex-to-libm -convert-complex-to-llvm -convert-func-to-llvm \
+// -reconcile-unrealized-casts  | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o coo_spmm_F64.o
+
+// clang++ coo_spmm_F64.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils \
+//         -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o coo_spmm_F64
+
+// ./coo_spmm_F64
+
+!Filename = !llvm.ptr<i8>
+
+#COO = #unisparse.encoding<{
+  crdMap = #unisparse.crd<(i,j)->(i,j)>,
+  compressMap = #unisparse.compress<trim(0,1)>
+}>
+
+#trait1 = {
+indexing_maps = [
+    affine_map<(i,j,k) -> (i, k)>,  // A
+    affine_map<(i,j,k) -> (k, j)>,  // B
+    affine_map<(i,j,k) -> (i, j)>   // X (out)
+  ],
+  iterator_types = ["parallel", "parallel", "reduction"],
+  doc = "X(i,j) =+ A(i,k) * B(k, j)"
+}
+
+module {
+  func.func private @rtclock() -> f64
+  func.func private @getTensorFilename(index) -> (!Filename)
+  func.func private @getTensorDim(!Filename, index) -> (index)
+
+  //CHECK-LABEL: func.func @main
+  func.func @main() {
+    %i0 = arith.constant 0.0 : f64
+    %c0 = arith.constant 0 : index
+    %c1 = arith.constant 1 : index
+    %c4 = arith.constant 1000 : index
+    %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)
+
+    %A_0 = unisparse.fromFile (%fileName) : !Filename to tensor<?x?xf64, #COO>
+    %dim0 = call @getTensorDim(%fileName, %c0) : (!Filename, index) -> (index)
+    %dim1 = call @getTensorDim(%fileName, %c1) : (!Filename, index) -> (index)
+    // %dim0 = tensor.dim %A_0, %c0 : tensor<?x?xf64, #COO>
+    // %dim1 = tensor.dim %A_0, %c1 : tensor<?x?xf64, #COO>
+
+    // Initialize vector matrix.
+    %init_256_4 = memref.alloc(%dim1, %c4) : memref<?x?xf64>
+    %b = scf.for %i = %c0 to %dim1 step %c1 iter_args(%t = %init_256_4) -> memref<?x?xf64> {
+      %b2 = scf.for %j = %c0 to %c4 step %c1 iter_args(%t2 = %t) -> memref<?x?xf64> {
+        %k0 = arith.muli %i, %c4 : index
+        %k1 = arith.addi %j, %k0 : index
+        %k2 = arith.index_cast %k1 : index to i32
+        %k = arith.sitofp %k2 : i32 to f64
+        memref.store %k, %t2[%i, %j] : memref<?x?xf64>
+        scf.yield %t2 : memref<?x?xf64>
+      }
+      scf.yield %b2 : memref<?x?xf64>
+    }
+
+    %o0_4_4 = memref.alloc(%dim0, %c4) : memref<?x?xf64>
+    %o0 = scf.for %i = %c0 to %dim0 step %c1 iter_args(%t = %o0_4_4) -> memref<?x?xf64> {
+      %x2 = scf.for %j = %c0 to %c4 step %c1 iter_args(%t2 = %t) -> memref<?x?xf64> {
+        memref.store %i0, %t2[%i, %j] : memref<?x?xf64>
+        scf.yield %t2 : memref<?x?xf64>
+      }
+      scf.yield %x2 : memref<?x?xf64>
+    }
+
+    %t_start4 = call @rtclock() : () -> f64
+    %0 = unisparse.coo_spmm %A_0, %init_256_4, %o0_4_4: tensor<?x?xf64, #COO>, memref<?x?xf64>, memref<?x?xf64> to memref<?x?xf64>
+    %t_end4 = call @rtclock() : () -> f64
+    %t_4 = arith.subf %t_end4, %t_start4: f64
+    vector.print %t_4 : f64
+    %v1 = vector.transfer_read %init_256_4[%c0, %c0], %i0: memref<?x?xf64>, vector<4x4xf64>
+    vector.print %v1 : vector<4x4xf64>
+    %v0 = vector.transfer_read %0[%c0, %c0], %i0: memref<?x?xf64>, vector<4x4xf64>
+    vector.print %v0 : vector<4x4xf64>
+
+    //Release the resources 
+    bufferization.dealloc_tensor %A_0 : tensor<?x?xf64, #COO>
+    return
+  }
+}
diff --git a/...rse/Integrate/CPU/unisparse_coo_spmv.mlir → ...Integrate/CPU/unisparse_coo_spmv_F32.mlir b/...rse/Integrate/CPU/unisparse_coo_spmv.mlir → ...Integrate/CPU/unisparse_coo_spmv_F32.mlir
@@ -1,14 +1,14 @@
-// unisparse-opt ./unisparse_coo_spmv.mlir -unisparse-codegen -lower-format-conversion -lower-struct -dce | \
+// unisparse-opt ./unisparse_coo_spmv_F32.mlir -unisparse-codegen -lower-format-conversion -lower-struct -dce | \
 // mlir-opt -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" \
 // -finalizing-bufferize -convert-linalg-to-loops -convert-vector-to-scf -convert-scf-to-cf -lower-affine \
 // -convert-vector-to-llvm -convert-memref-to-llvm -convert-complex-to-standard -convert-math-to-llvm \
 // -convert-math-to-libm -convert-complex-to-libm -convert-complex-to-llvm -convert-func-to-llvm \
-// -reconcile-unrealized-casts  | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o coo_spmv.o
+// -reconcile-unrealized-casts  | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o coo_spmv_F32.o
 
-// clang++ coo_spmv.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils \
-//         -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o coo_spmv
+// clang++ coo_spmv_F32.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils \
+//         -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o coo_spmv_F32
 
-// ./coo_spmv
+// ./coo_spmv_F32
 
 !Filename = !llvm.ptr<i8>
 

diff --git a/test/UniSparse/Integrate/CPU/unisparse_coo_spmv_F64.mlir b/test/UniSparse/Integrate/CPU/unisparse_coo_spmv_F64.mlir
@@ -0,0 +1,84 @@
+// unisparse-opt ./unisparse_coo_spmv_F64.mlir -unisparse-codegen -lower-format-conversion -lower-struct -dce | \
+// mlir-opt -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" \
+// -finalizing-bufferize -convert-linalg-to-loops -convert-vector-to-scf -convert-scf-to-cf -lower-affine \
+// -convert-vector-to-llvm -convert-memref-to-llvm -convert-complex-to-standard -convert-math-to-llvm \
+// -convert-math-to-libm -convert-complex-to-libm -convert-complex-to-llvm -convert-func-to-llvm \
+// -reconcile-unrealized-casts  | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o coo_spmv_F64.o
+
+// clang++ coo_spmv_F64.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils \
+//         -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o coo_spmv_F64
+
+// ./coo_spmv_F64
+
+!Filename = !llvm.ptr<i8>
+
+#COO = #unisparse.encoding<{
+  crdMap = #unisparse.crd<(i,j)->(i,j)>,
+  compressMap = #unisparse.compress<trim(0,1)>
+}>
+
+#trait1 = {
+indexing_maps = [
+    affine_map<(i,j) -> (i, j)>,  // A
+    affine_map<(i,j) -> (j)>,  // B
+    affine_map<(i,j) -> (i)>   // X (out)
+  ],
+  iterator_types = ["parallel", "reduction"],
+  doc = "X(i) =+ A(i,j) * B(j)"
+}
+
+module {
+  func.func private @rtclock() -> f64
+  func.func private @getTensorFilename(index) -> (!Filename)
+  func.func private @getTensorDim(!Filename, index) -> (index)
+  func.func private @printU64(index) -> ()
+
+  //CHECK-LABEL: func.func @main
+  func.func @main() {
+    %i0 = arith.constant 0.0 : f64
+    %c0 = arith.constant 0 : index
+    %c1 = arith.constant 1 : index
+
+    %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)
+
+    %A_0 = unisparse.fromFile (%fileName) : !Filename to tensor<?x?xf64, #COO>
+    %dim0 = call @getTensorDim(%fileName, %c0) : (!Filename, index) -> (index)
+    %dim1 = call @getTensorDim(%fileName, %c1) : (!Filename, index) -> (index)
+    call @printU64(%dim0) : (index) -> ()
+    call @printU64(%dim1) : (index) -> ()
+    // %dim0 = tensor.dim %A_0, %c0 : tensor<?x?xf64, #COO>
+    // %dim1 = tensor.dim %A_0, %c1 : tensor<?x?xf64, #COO>
+
+    // Initialize vector matrix.
+    %init_256_4 = memref.alloc(%dim1) : memref<?xf64>
+    %b = scf.for %i = %c0 to %dim1 step %c1 iter_args(%t = %init_256_4) -> memref<?xf64> {
+      %k0 = arith.muli %i, %c1 : index
+      %k1 = arith.index_cast %k0 : index to i32
+      %k = arith.sitofp %k1 : i32 to f64
+      memref.store %k, %t[%i] : memref<?xf64>
+      scf.yield %t : memref<?xf64>
+    }
+
+    %o0_4_4 = memref.alloc(%dim0) : memref<?xf64>
+    %o0 = scf.for %i = %c0 to %dim0 step %c1 iter_args(%t = %o0_4_4) -> memref<?xf64> {
+      memref.store %i0, %t[%i] : memref<?xf64>
+      scf.yield %t : memref<?xf64>
+    }
+
+    %t_start4 = call @rtclock() : () -> f64
+    %0 = unisparse.coo_spmv %A_0, %init_256_4, %o0_4_4: tensor<?x?xf64, #COO>, memref<?xf64>, memref<?xf64> to memref<?xf64>
+    %t_end4 = call @rtclock() : () -> f64
+    %t_4 = arith.subf %t_end4, %t_start4: f64
+    vector.print %t_4 : f64
+    %v1 = vector.transfer_read %init_256_4[%c0], %i0: memref<?xf64>, vector<4xf64>
+    vector.print %v1 : vector<4xf64>
+    %v0 = vector.transfer_read %0[%c0], %i0: memref<?xf64>, vector<4xf64>
+    vector.print %v0 : vector<4xf64>
+
+    //Release the resources 
+    bufferization.dealloc_tensor %A_0 : tensor<?x?xf64, #COO>
+//    bufferization.dealloc_tensor %init_256_4 : tensor<?xf64>
+//    bufferization.dealloc_tensor %o0_4_4 : tensor<?xf64>
+    return
+  }
+}
diff --git a/...rse/Integrate/CPU/unisparse_csc_spmm.mlir → ...Integrate/CPU/unisparse_csc_spmm_F32.mlir b/...rse/Integrate/CPU/unisparse_csc_spmm.mlir → ...Integrate/CPU/unisparse_csc_spmm_F32.mlir
@@ -1,14 +1,14 @@
-// unisparse-opt ./unisparse_csc_spmm.mlir -unisparse-codegen -lower-format-conversion -lower-struct -dce | \
+// unisparse-opt ./unisparse_csc_spmm_F32.mlir -unisparse-codegen -lower-format-conversion -lower-struct -dce | \
 // mlir-opt -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" \
 // -finalizing-bufferize -convert-linalg-to-loops -convert-vector-to-scf -convert-scf-to-cf -lower-affine \
 // -convert-vector-to-llvm -convert-memref-to-llvm -convert-complex-to-standard -convert-math-to-llvm \
 // -convert-math-to-libm -convert-complex-to-libm -convert-complex-to-llvm -convert-func-to-llvm \
-// -reconcile-unrealized-casts  | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o spmm.o
+// -reconcile-unrealized-casts  | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o csc_spmm_F32.o
 
-// clang++ spmm.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils \
-//         -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o spmm
+// clang++ csc_spmm_F32.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils \
+//         -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o csc_spmm_F32
 
-// ./spmm
+// ./csc_spmm_F32
 
 !Filename = !llvm.ptr<i8>
 

diff --git a/test/UniSparse/Integrate/CPU/unisparse_csc_spmm_F64.mlir b/test/UniSparse/Integrate/CPU/unisparse_csc_spmm_F64.mlir
@@ -0,0 +1,105 @@
+// unisparse-opt ./unisparse_csc_spmm_F64.mlir -unisparse-codegen -lower-format-conversion -lower-struct -dce | \
+// mlir-opt -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" \
+// -finalizing-bufferize -convert-linalg-to-loops -convert-vector-to-scf -convert-scf-to-cf -lower-affine \
+// -convert-vector-to-llvm -convert-memref-to-llvm -convert-complex-to-standard -convert-math-to-llvm \
+// -convert-math-to-libm -convert-complex-to-libm -convert-complex-to-llvm -convert-func-to-llvm \
+// -reconcile-unrealized-casts  | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o csc_spmm_F64.o
+
+// clang++ csc_spmm_F64.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils \
+//         -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o csc_spmm_F64
+
+// ./csc_spmm_F64
+
+!Filename = !llvm.ptr<i8>
+
+#COO = #unisparse.encoding<{
+  crdMap = #unisparse.crd<(i,j)->(i,j)>,
+  compressMap = #unisparse.compress<trim(0,1)>
+}>
+
+#CSC = #unisparse.encoding<{
+  crdMap = #unisparse.crd<(i,j)->(j, i)>,
+  compressMap = #unisparse.compress<fuse(0), trim(1,1)>
+}>
+
+#trait1 = {
+indexing_maps = [
+    affine_map<(i,j,k) -> (i, k)>,  // A
+    affine_map<(i,j,k) -> (k, j)>,  // B
+    affine_map<(i,j,k) -> (i, j)>   // X (out)
+  ],
+  iterator_types = ["parallel", "parallel", "reduction"],
+  doc = "X(i,j) =+ A(i,k) * B(k, j)"
+}
+
+module {
+  func.func private @rtclock() -> f64
+  func.func private @getTensorFilename(index) -> (!Filename)
+
+  func.func @kernel_csc_spmm(%arg0: tensor<?x?xf64, #CSC>, %arg1: tensor<?x?xf64>, %argx: tensor<?x?xf64>) -> tensor<?x?xf64> {
+    %0 = linalg.generic #trait1
+    ins(%arg0, %arg1 : tensor<?x?xf64, #CSC>, tensor<?x?xf64>)
+    outs(%argx: tensor<?x?xf64>) {
+    ^bb0(%a: f64, %b: f64, %x: f64):
+      %2 = arith.mulf %a, %b : f64
+      %3 = arith.addf %x, %2 : f64
+      linalg.yield %3 : f64
+    } -> tensor<?x?xf64>
+    return %0 : tensor<?x?xf64>
+  }
+
+  //CHECK-LABEL: func.func @main
+  func.func @main() {
+    %i0 = arith.constant 0.0 : f64
+    %c0 = arith.constant 0 : index
+    %c1 = arith.constant 1 : index
+    %c4 = arith.constant 1000 : index
+
+    %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)
+
+    %t_start1 = call @rtclock() : () -> f64
+    %A_1 = unisparse.fromFile (%fileName) : !Filename to tensor<?x?xf64, #COO>
+    %c256 = tensor.dim %A_1, %c1 : tensor<?x?xf64, #COO>
+    %a1 = unisparse.convert (%A_1): tensor<?x?xf64, #COO> to tensor<?x?xf64, #CSC>
+    %t_end1 = call @rtclock() : () -> f64
+    %t_1 = arith.subf %t_end1, %t_start1: f64
+    vector.print %t_1 : f64
+
+    // Initialize dense matrix.
+    %init_256_4 = bufferization.alloc_tensor(%c256, %c4) : tensor<?x?xf64>
+    %b = scf.for %i = %c0 to %c256 step %c1 iter_args(%t = %init_256_4) -> tensor<?x?xf64> {
+      %b2 = scf.for %j = %c0 to %c4 step %c1 iter_args(%t2 = %t) -> tensor<?x?xf64> {
+        %k0 = arith.muli %i, %c4 : index
+        %k1 = arith.addi %j, %k0 : index
+        %k2 = arith.index_cast %k1 : index to i32
+        %k = arith.sitofp %k2 : i32 to f64
+        %t3 = tensor.insert %k into %t2[%i, %j] : tensor<?x?xf64>
+        scf.yield %t3 : tensor<?x?xf64>
+      }
+      scf.yield %b2 : tensor<?x?xf64>
+    }
+
+    %o1_4_4 = bufferization.alloc_tensor(%c256, %c4) : tensor<?x?xf64>
+    %o1 = scf.for %i = %c0 to %c256 step %c1 iter_args(%t = %o1_4_4) -> tensor<?x?xf64> {
+      %x2 = scf.for %j = %c0 to %c4 step %c1 iter_args(%t2 = %t) -> tensor<?x?xf64> {
+        %t3 = tensor.insert %i0 into %t2[%i, %j] : tensor<?x?xf64>
+        scf.yield %t3 : tensor<?x?xf64>
+      }
+      scf.yield %x2 : tensor<?x?xf64>
+    }
+
+    %t_start5 = call @rtclock() : () -> f64
+    %1 = call @kernel_csc_spmm(%a1, %b, %o1) : (tensor<?x?xf64, #CSC>, tensor<?x?xf64>, tensor<?x?xf64>) -> tensor<?x?xf64>
+    %t_end5 = call @rtclock() : () -> f64
+    %t_5 = arith.subf %t_end5, %t_start5: f64
+    vector.print %t_5 : f64
+    %v1 = vector.transfer_read %1[%c0, %c0], %i0: tensor<?x?xf64>, vector<4x4xf64>
+    vector.print %v1 : vector<4x4xf64>
+
+    //Release the resources 
+    bufferization.dealloc_tensor %A_1 : tensor<?x?xf64, #COO>
+//    bufferization.dealloc_tensor %init_256_4 : tensor<?x?xf64>
+//    bufferization.dealloc_tensor %o1_4_4 : tensor<?x?xf64>
+    return
+  }
+}
diff --git a/...rse/Integrate/CPU/unisparse_csc_spmv.mlir → ...Integrate/CPU/unisparse_csc_spmv_F32.mlir b/...rse/Integrate/CPU/unisparse_csc_spmv.mlir → ...Integrate/CPU/unisparse_csc_spmv_F32.mlir
@@ -1,14 +1,14 @@
-// unisparse-opt ./unisparse_csc_spmv.mlir -unisparse-codegen -lower-format-conversion -lower-struct -dce | \
+// unisparse-opt ./unisparse_csc_spmv_F32.mlir -unisparse-codegen -lower-format-conversion -lower-struct -dce | \
 // mlir-opt -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" \
 // -finalizing-bufferize -convert-linalg-to-loops -convert-vector-to-scf -convert-scf-to-cf -lower-affine \
 // -convert-vector-to-llvm -convert-memref-to-llvm -convert-complex-to-standard -convert-math-to-llvm \
 // -convert-math-to-libm -convert-complex-to-libm -convert-complex-to-llvm -convert-func-to-llvm \
-// -reconcile-unrealized-casts  | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o spmv.o
+// -reconcile-unrealized-casts  | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o csc_spmv_F32.o
 
-// clang++ spmv.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils \
-//         -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o spmv
+// clang++ csc_spmv_F32.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils \
+//         -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o csc_spmv_F32
 
-// ./spmv
+// ./csc_spmv_F32
 
 !Filename = !llvm.ptr<i8>