clean test cas3es

cornell-zhang · May 9, 2024 · 6bc4db7 · 6bc4db7
1 parent 15ddc00
commit 6bc4db7
Show file tree

Hide file tree

Showing 6 changed files with 22 additions and 317 deletions.
diff --git a/test/UniSparse/KernelGen/HLS/codegen.mlir b/test/UniSparse/KernelGen/HLS/codegen.mlir
diff --git a/test/UniSparse/KernelGen/HLS/csr_spmv b/test/UniSparse/KernelGen/HLS/csr_spmv
diff --git a/test/UniSparse/KernelGen/HLS/csr_spmv.o b/test/UniSparse/KernelGen/HLS/csr_spmv.o
diff --git a/test/UniSparse/KernelGen/HLS/device.cpp b/test/UniSparse/KernelGen/HLS/device.cpp
diff --git a/.../KernelGen/HLS/unisparse_host_device.mlir → ...arse/KernelGen/HLS/unisparse_csr_1PE.mlir b/.../KernelGen/HLS/unisparse_host_device.mlir → ...arse/KernelGen/HLS/unisparse_csr_1PE.mlir
diff --git a/...arse/KernelGen/HLS/unisparse_lowered.mlir → ...arse/KernelGen/HLS/unisparse_dia_1PE.mlir b/...arse/KernelGen/HLS/unisparse_lowered.mlir → ...arse/KernelGen/HLS/unisparse_dia_1PE.mlir
@@ -17,9 +17,9 @@
   compressMap = #unisparse.compress<trim(0,1)>
 }>
 
-#CSR = #unisparse.encoding<{
-  crdMap = #unisparse.crd<(i,j)->(i,j)>,
-  compressMap = #unisparse.compress<fuse(0), trim(1,1)>
+#DIA = #unisparse.encoding<{
+  crdMap = #unisparse.crd<(i,j)->(j-i,i)>,
+  compressMap = #unisparse.compress<trim(0)>
 }>
 
 #trait1 = {
@@ -36,13 +36,13 @@ module {
   func.func private @rtclock() -> f64
   func.func private @getTensorFilename(index) -> (!Filename)
 
-//   func.func @kernel_csr_spmv(%arg0: tensor<?x?xf32, #CSR>, %arg1: tensor<?xf32>, %argx: tensor<?xf32>) -> tensor<?xf32> {
+//   func.func @kernel_csr_spmv(%arg0: tensor<?x?xf32, #DIA>, %arg1: tensor<?xf32>, %argx: tensor<?xf32>) -> tensor<?xf32> {
 //     %0 = linalg.generic #trait1
-//     ins(%arg0, %arg1 : tensor<?x?xf32, #CSR>, tensor<?xf32>)
+//     ins(%arg0, %arg1 : tensor<?x?xf32, #DIA>, tensor<?xf32>)
 //     outs(%argx: tensor<?xf32>) {
-//     ^bb0(%a: f32, %x: f32, %o: f32):
-//       %2 = arith.mulf %a, %x : f32
-//       %3 = arith.addf %o, %2 : f32
+//     ^bb0(%a: f32, %b: f32, %x: f32):
+//       %2 = arith.mulf %a, %b : f32
+//       %3 = arith.addf %x, %2 : f32
 //       linalg.yield %3 : f32
 //     } -> tensor<?xf32>
 //     return %0 : tensor<?xf32>
@@ -60,7 +60,7 @@ module {
     %t_start0 = call @rtclock() : () -> f64
     %A_0 = unisparse.fromFile (%fileName) : !Filename to tensor<?x?xf32, #COO>
     %c256 = tensor.dim %A_0, %c1 : tensor<?x?xf32, #COO>
-    %a = unisparse.convert (%A_0): tensor<?x?xf32, #COO> to tensor<?x?xf32, #CSR>
+    %a0 = unisparse.convert (%A_0): tensor<?x?xf32, #COO> to tensor<?x?xf32, #DIA>
     %t_end0 = call @rtclock() : () -> f64
     %t_0 = arith.subf %t_end0, %t_start0: f64
     vector.print %t_0 : f64
@@ -70,54 +70,33 @@ module {
     %ts_dim_i = arith.index_cast %c256 : index to i32
     %ts_dim = arith.sitofp %ts_dim_i : i32 to f32
     %elm = arith.divf %i1, %ts_dim : f32
-    %x = scf.for %i = %c0 to %c256 step %c1 iter_args(%t = %init_256_4) -> tensor<?xf32> {
-      // %k0 = arith.muli %i, %c1 : index
-      // %k1 = arith.index_cast %k0 : index to i32
-      // %k1 = arith.index_cast %i : index to i32
-      // %k = arith.sitofp %k1 : i32 to f32
+    %b = scf.for %i = %c0 to %c256 step %c1 iter_args(%t = %init_256_4) -> tensor<?xf32> {
       %t3 = tensor.insert %elm into %t[%i] : tensor<?xf32>
       scf.yield %t3 : tensor<?xf32>
     }
 
-    %y_4_4 = bufferization.alloc_tensor(%c256) : tensor<?xf32>
-    %y = scf.for %i = %c0 to %c256 step %c1 iter_args(%t = %y_4_4) -> tensor<?xf32> {
+    %o0_4_4 = bufferization.alloc_tensor(%c256) : tensor<?xf32>
+    %o0 = scf.for %i = %c0 to %c256 step %c1 iter_args(%t = %o0_4_4) -> tensor<?xf32> {
       %t3 = tensor.insert %i0 into %t[%i] : tensor<?xf32>
       scf.yield %t3 : tensor<?xf32>
     }
 
     // %t_start4 = call @rtclock() : () -> f64
-    %0 = unisparse.device (%a, %x, %y) {target = "HLS"}: 
-      (tensor<?x?xf32, #CSR>, tensor<?xf32>, tensor<?xf32>) to tensor<?xf32> {
-      // // read in
-      // %a_d0_ptr = unisparse.read_ptr (%a) : tensor<?x?xf32> -> tensor<?xf32> 
-      // %a_d1_crd = unisparse.read_crd (%a) : tensor<?x?xf32> -> tensor<?xf32>
-      // %a_val = unisparse.read_val (%a) : tensor<?x?xf32> -> tensor<?xf32>
-      // %x_val = unisparse.read_val (%x) : tensor<?xf32> -> tensor<?xf32>
-      // // decode sparse matrix indices
-      // %a_d0_crd = unisparse.repeater (%a_d0_ptr) : tensor<?xf32> -> tensor<?xf32> // subject to change
-      // %a_ori_d0, %a_ori_d1 = unisparse.index_calc (%a_d0_crd, %a_d1_crd) : tensor<?xf32>, tensor<?xf32> -> tensor<?xf32>, tensor<?xf32>
-      // // compute
-      // %y_d0, %psum = unisparse.PE_mul (%a_ori_d0, %a_ori_d1, %a_val, %x_val) : 
-      //   tensor<?xf32>, tensor<?xf32>, tensor<?xf32>, tensor<?xf32> -> tensor<?xf32>, tensor<?xf32>
-      // %y = unisparse.PE_accum (%y_d0, %psum):
-      //   tensor<?xf32>, tensor<?xf32> -> tensor<?xf32>
-      // // write out
-      // unisparse.write_val (%y) : tensor<?xf32>
-      // // generate top
-      // unisparse.top (%a, %x, %y) : tensor<?x?xf32, #CSR>, tensor<?xf32>, tensor<?xf32>
-      // // unisparse.terminator
-    }
-    // %0 = call @kernel_csr_spmv(%a, %x, %y) : (tensor<?x?xf32, #CSR>, tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
-    // %t_end4 = call @rtclock() : () -> f64
-    %t_4 = arith.subf %t_end4, %t_start4: f64
-    vector.print %t_4 : f64
+    %0 = unisparse.device (%a0, %b, %o0) {target = "HLS"} {
+      %0 = linalg.generic #trait1
+      ins(%a0, %b : tensor<?x?xf32, #DIA>, tensor<?xf32>)
+      outs(%o0: tensor<?xf32>) {
+      ^bb0(%a: f32, %b1: f32, %x: f32):
+        %2 = arith.mulf %a, %b1 : f32
+        %3 = arith.addf %x, %2 : f32
+        linalg.yield %3 : f32
+      } -> tensor<?xf32>
+    }: tensor<?x?xf32, #DIA>, tensor<?xf32>, tensor<?xf32> -> tensor<?xf32>
     %v0 = vector.transfer_read %0[%c0], %i0: tensor<?xf32>, vector<4xf32>
     vector.print %v0 : vector<4xf32>
 
     //Release the resources 
     bufferization.dealloc_tensor %A_0 : tensor<?x?xf32, #COO>
-//    bufferization.dealloc_tensor %init_256_4 : tensor<?xf32>
-//    bufferization.dealloc_tensor %y_4_4 : tensor<?xf32>
     return
   }
 }