Skip to content

Commit

Permalink
script
Browse files Browse the repository at this point in the history
  • Loading branch information
Sibylau committed Jan 6, 2024
1 parent 80e41d3 commit 66654df
Show file tree
Hide file tree
Showing 7 changed files with 288 additions and 7 deletions.
164 changes: 164 additions & 0 deletions evaluation/CPU/UniSparse/sparlay_bdia_csr_spmv.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
// sparlay-opt ./decompose-BDIA.mlir -lower-struct-convert -lower-struct -dce -sparlay-codegen -lower-format-conversion | \
// mlir-opt -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" \
// -finalizing-bufferize -convert-linalg-to-loops -convert-vector-to-scf -convert-scf-to-cf -lower-affine \
// -convert-vector-to-llvm -convert-memref-to-llvm -convert-complex-to-standard -convert-math-to-llvm \
// -convert-math-to-libm -convert-complex-to-libm -convert-complex-to-llvm -convert-func-to-llvm \
// -reconcile-unrealized-casts | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o bdia_spmv.o

// clang++ bdia_spmv.o -L$SPLHOME/build/lib -lmlir_sparlay_runner_utils \
// -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o bdia_spmv

// ./bdia_spmv

// RUN: sparlay-opt %s -lower-struct-convert -lower-struct -dce -lower-format-conversion | FileCheck %s


!Filename = !llvm.ptr<i8>

#COO = #sparlay.encoding<{
crdMap = #sparlay.crd<(i,j)->(i,j)>,
compressMap = #sparlay.compress<trim(0,1)>
}>

#CSR = #sparlay.encoding<{
crdMap = #sparlay.crd<(i,j)->(i,j)>,
compressMap = #sparlay.compress<fuse(0), trim(1,1)>
}>

#BDIA = #sparlay.encoding<{
crdMap = #sparlay.crd<(i,j)->(i floordiv 50, j minus i, i mod 50)>,
compressMap = #sparlay.compress<fuse(0), trim(1,1)>
}>

#trait1 = {
indexing_maps = [
affine_map<(i,j) -> (i, j)>, // A
affine_map<(i,j) -> (j)>, // B
affine_map<(i,j) -> (i)> // X (out)
],
iterator_types = ["parallel", "reduction"],
doc = "X(i) =+ A(i,j) * B(j)"
}

module {
func.func private @rtclock() -> f64
func.func private @getTensorFilename(index) -> (!Filename)
func.func @kernel_csr_spmv(%arg0: tensor<?x?xf32, #CSR>, %arg1: tensor<?xf32>, %argx: tensor<?xf32>) -> tensor<?xf32> {
%0 = linalg.generic #trait1
ins(%arg0, %arg1 : tensor<?x?xf32, #CSR>, tensor<?xf32>)
outs(%argx: tensor<?xf32>) {
^bb0(%a: f32, %b: f32, %x: f32):
%2 = arith.mulf %a, %b : f32
%3 = arith.addf %x, %2 : f32
linalg.yield %3 : f32
} -> tensor<?xf32>
return %0 : tensor<?xf32>
}

func.func @main() {
%c0 = arith.constant 0: index
%c1 = arith.constant 1 : index
%f0 = arith.constant 0.0: f32
%f05 = arith.constant 0.5: f32
%i1 = arith.constant 1: i32
%blockSize = arith.constant 100: i32
%thres_1 = arith.constant 0.5: f32

%fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)
%A_1 = sparlay.fromFile (%fileName): !llvm.ptr<i8> to tensor<?x?xf32, #COO>
%dim1 = tensor.dim %A_1, %c1 : tensor<?x?xf32, #COO>
%dim0 = tensor.dim %A_1, %c0 : tensor<?x?xf32, #COO>
// %thres_1 = arith.constant dense<[0.5]>: tensor<1xf32>
// %thres_2 = bufferization.alloc_tensor () copy(%thres_1): tensor<1xf32>
// %thres = bufferization.to_memref %thres_2: memref<1xf32>

%t_start0 = call @rtclock() : () -> f64
%S_1 = sparlay.decompose_BDIA %A_1, %blockSize, %thres_1 : tensor<?x?xf32, #COO>, i32, f32 to
!sparlay.struct< tensor<?x?xf32,#COO>, tensor<?x?xf32,#BDIA> >
%t_end0 = call @rtclock() : () -> f64
%t_0 = arith.subf %t_end0, %t_start0: f64
vector.print %t_0 : f64

%B_0 = sparlay.struct_access %S_1[0]:
!sparlay.struct< tensor<?x?xf32,#COO>, tensor<?x?xf32,#BDIA> >
to tensor<?x?xf32, #COO>
%B_1 = sparlay.struct_access %S_1[1]:
!sparlay.struct< tensor<?x?xf32,#COO>, tensor<?x?xf32,#BDIA> >
to tensor<?x?xf32, #BDIA>

%D_0 = sparlay.convert(%B_0) : tensor<?x?xf32, #COO> to tensor<?x?xf32, #CSR>

// %init_256_4 = bufferization.alloc_tensor(%dim1) : tensor<?xf32>
// %b = scf.for %i = %c0 to %dim1 step %c1 iter_args(%t = %init_256_4) -> tensor<?xf32> {
// %k0 = arith.muli %i, %c1 : index
// %k1 = arith.index_cast %k0 : index to i32
// %k = arith.sitofp %k1 : i32 to f32
// %t3 = tensor.insert %k into %t[%i] : tensor<?xf32>
// scf.yield %t3 : tensor<?xf32>
// }
%init_256_4 = bufferization.alloc_tensor(%dim1) : tensor<?xf32>
// %tensor_B = tensor.insert %f05 into %init_256_4[%c0] : tensor<?xf32>
// %dim1_1 = arith.subi %dim1, %c1 : index
// %i_dim1_1 = arith.index_cast %dim1_1 : index to i32
// %f_dim1_1 = arith.sitofp %i_dim1_1 : i32 to f32
// %elm = arith.divf %f05, %f_dim1_1 : f32
// %b = scf.for %i = %c1 to %dim1 step %c1 iter_args(%t = %tensor_B) -> tensor<?xf32> {
%b = scf.for %i = %c0 to %dim1 step %c1 iter_args(%t = %init_256_4) -> tensor<?xf32> {
%k1 = arith.index_cast %i : index to i32
%k = arith.sitofp %k1 : i32 to f32
%t3 = tensor.insert %k into %t[%i] : tensor<?xf32>
scf.yield %t3 : tensor<?xf32>
}

// %o0 = bufferization.alloc_tensor(%dim0) : tensor<?xf32>
// %o00 = scf.for %i = %c0 to %dim0 step %c1 iter_args(%t = %o0) -> tensor<?xf32> {
// %t3 = tensor.insert %f0 into %t[%i] : tensor<?xf32>
// scf.yield %t3 : tensor<?xf32>
// }
%o1 = bufferization.alloc_tensor(%dim0) : tensor<?xf32>
%o11 = scf.for %i = %c0 to %dim0 step %c1 iter_args(%t = %o1) -> tensor<?xf32> {
%t3 = tensor.insert %f0 into %t[%i] : tensor<?xf32>
scf.yield %t3 : tensor<?xf32>
}
// %o2 = bufferization.alloc_tensor(%dim0) : tensor<?xf32>
// %o22 = scf.for %i = %c0 to %dim0 step %c1 iter_args(%t = %o2) -> tensor<?xf32> {
// %t3 = tensor.insert %f0 into %t[%i] : tensor<?xf32>
// scf.yield %t3 : tensor<?xf32>
// }

%t_start4 = call @rtclock() : () -> f64
// CSR SpMV
// %result0 = call @kernel_csr_spmv(%D_0, %b, %o00) : (tensor<?x?xf32, #CSR>, tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
// %t_end1 = call @rtclock() : () -> f64
// block DIA SpMV
%result1 = sparlay.bdia_spmv %D_0, %B_1, %b, %o1:
tensor<?x?xf32, #CSR>, tensor<?x?xf32,#BDIA>, tensor<?xf32>, tensor<?xf32> to memref<?xf32>
// %t_end2 = call @rtclock() : () -> f64
// %output = linalg.elemwise_binary ins(%result0, %result1: tensor<?xf32>, tensor<?xf32>)
// outs(%o2: tensor<?xf32>) -> tensor<?xf32>
%t_end4 = call @rtclock() : () -> f64
// %t_1 = arith.subf %t_end1, %t_start4: f64
// %t_2 = arith.subf %t_end2, %t_end1: f64
// %t_4 = arith.subf %t_end4, %t_end2: f64
%t_5 = arith.subf %t_end4, %t_start4: f64
// vector.print %t_1 : f64
// vector.print %t_2 : f64
// vector.print %t_4 : f64
vector.print %t_5 : f64
// %v0 = vector.transfer_read %result0[%c0], %f0: tensor<?xf32>, vector<4xf32>
// vector.print %v0 : vector<4xf32>
%v1 = vector.transfer_read %result1[%c0], %f0: memref<?xf32>, vector<4xf32>
vector.print %v1 : vector<4xf32>
// %v2 = vector.transfer_read %output[%c0], %f0: tensor<?xf32>, vector<4xf32>
// vector.print %v2 : vector<4xf32>
bufferization.dealloc_tensor %A_1 : tensor<?x?xf32, #COO>
bufferization.dealloc_tensor %B_1 : tensor<?x?xf32, #BDIA>
sparlay.release %S_1: !sparlay.struct< tensor<?x?xf32,#COO>, tensor<?x?xf32,#BDIA> >
// bufferization.dealloc_tensor %B_0 : tensor<?x?xf32, #COO>
// bufferization.dealloc_tensor %o1 : tensor<?xf32>
// bufferization.dealloc_tensor %result0 : tensor<?xf32>
// bufferization.dealloc_tensor %output : tensor<?xf32>

return
}
}
119 changes: 119 additions & 0 deletions evaluation/CPU/UniSparse/unisparse_bdia_csr_spmv.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// sparlay-opt ./decompose-BDIA.mlir -lower-struct-convert -lower-struct -dce -sparlay-codegen -lower-format-conversion | \
// mlir-opt -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" \
// -finalizing-bufferize -convert-linalg-to-loops -convert-vector-to-scf -convert-scf-to-cf -lower-affine \
// -convert-vector-to-llvm -convert-memref-to-llvm -convert-complex-to-standard -convert-math-to-llvm \
// -convert-math-to-libm -convert-complex-to-libm -convert-complex-to-llvm -convert-func-to-llvm \
// -reconcile-unrealized-casts | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o bdia_spmv.o

// clang++ bdia_spmv.o -L$SPLHOME/build/lib -lmlir_sparlay_runner_utils \
// -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o bdia_spmv

// ./bdia_spmv

// RUN: sparlay-opt %s -lower-struct-convert -lower-struct -dce -lower-format-conversion | FileCheck %s


!Filename = !llvm.ptr<i8>

#COO = #sparlay.encoding<{
crdMap = #sparlay.crd<(i,j)->(i,j)>,
compressMap = #sparlay.compress<trim(0,1)>
}>

#CSR = #sparlay.encoding<{
crdMap = #sparlay.crd<(i,j)->(i,j)>,
compressMap = #sparlay.compress<fuse(0), trim(1,1)>
}>

#BDIA = #sparlay.encoding<{
crdMap = #sparlay.crd<(i,j)->(i floordiv 50, j minus i, i mod 50)>,
compressMap = #sparlay.compress<fuse(0), trim(1,1)>
}>

#trait1 = {
indexing_maps = [
affine_map<(i,j) -> (i, j)>, // A
affine_map<(i,j) -> (j)>, // B
affine_map<(i,j) -> (i)> // X (out)
],
iterator_types = ["parallel", "reduction"],
doc = "X(i) =+ A(i,j) * B(j)"
}

module {
func.func private @rtclock() -> f64
func.func private @getTensorFilename(index) -> (!Filename)
func.func @kernel_csr_spmv(%arg0: tensor<?x?xf32, #CSR>, %arg1: tensor<?xf32>, %argx: tensor<?xf32>) -> tensor<?xf32> {
%0 = linalg.generic #trait1
ins(%arg0, %arg1 : tensor<?x?xf32, #CSR>, tensor<?xf32>)
outs(%argx: tensor<?xf32>) {
^bb0(%a: f32, %b: f32, %x: f32):
%2 = arith.mulf %a, %b : f32
%3 = arith.addf %x, %2 : f32
linalg.yield %3 : f32
} -> tensor<?xf32>
return %0 : tensor<?xf32>
}

func.func @main() {
%c0 = arith.constant 0: index
%c1 = arith.constant 1 : index
%f0 = arith.constant 0.0: f32
%f05 = arith.constant 0.5: f32
%i1 = arith.constant 1: i32
%blockSize = arith.constant 100: i32
%thres_1 = arith.constant 0.3: f32

%fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)
%A_1 = sparlay.fromFile (%fileName): !llvm.ptr<i8> to tensor<?x?xf32, #COO>
%dim1 = tensor.dim %A_1, %c1 : tensor<?x?xf32, #COO>
%dim0 = tensor.dim %A_1, %c0 : tensor<?x?xf32, #COO>

%t_start0 = call @rtclock() : () -> f64
%S_1 = sparlay.decompose_BDIA %A_1, %blockSize, %thres_1 : tensor<?x?xf32, #COO>, i32, f32 to
!sparlay.struct< tensor<?x?xf32,#COO>, tensor<?x?xf32,#BDIA> >
%t_end0 = call @rtclock() : () -> f64
%t_0 = arith.subf %t_end0, %t_start0: f64
vector.print %t_0 : f64

%B_0 = sparlay.struct_access %S_1[0]:
!sparlay.struct< tensor<?x?xf32,#COO>, tensor<?x?xf32,#BDIA> >
to tensor<?x?xf32, #COO>
%B_1 = sparlay.struct_access %S_1[1]:
!sparlay.struct< tensor<?x?xf32,#COO>, tensor<?x?xf32,#BDIA> >
to tensor<?x?xf32, #BDIA>

%D_0 = sparlay.convert(%B_0) : tensor<?x?xf32, #COO> to tensor<?x?xf32, #CSR>

%init_256_4 = bufferization.alloc_tensor(%dim1) : tensor<?xf32>
%b = scf.for %i = %c0 to %dim1 step %c1 iter_args(%t = %init_256_4) -> tensor<?xf32> {
%k1 = arith.index_cast %i : index to i32
%k = arith.sitofp %k1 : i32 to f32
%t3 = tensor.insert %k into %t[%i] : tensor<?xf32>
scf.yield %t3 : tensor<?xf32>
}

%o1 = bufferization.alloc_tensor(%dim0) : tensor<?xf32>
%o11 = scf.for %i = %c0 to %dim0 step %c1 iter_args(%t = %o1) -> tensor<?xf32> {
%t3 = tensor.insert %f0 into %t[%i] : tensor<?xf32>
scf.yield %t3 : tensor<?xf32>
}

%t_start4 = call @rtclock() : () -> f64
// block DIA SpMV
%result1 = sparlay.bdia_spmv %D_0, %B_1, %b, %o1:
tensor<?x?xf32, #CSR>, tensor<?x?xf32,#BDIA>, tensor<?xf32>, tensor<?xf32> to memref<?xf32>
outs(%o2: tensor<?xf32>) -> tensor<?xf32>
%t_end4 = call @rtclock() : () -> f64
%t_5 = arith.subf %t_end4, %t_start4: f64
vector.print %t_5 : f64

%v1 = vector.transfer_read %result1[%c0], %f0: memref<?xf32>, vector<4xf32>
vector.print %v1 : vector<4xf32>
bufferization.dealloc_tensor %A_1 : tensor<?x?xf32, #COO>
bufferization.dealloc_tensor %B_1 : tensor<?x?xf32, #BDIA>
sparlay.release %S_1: !sparlay.struct< tensor<?x?xf32,#COO>, tensor<?x?xf32,#BDIA> >

return
}
}
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,11 @@ module {
func.func private @delUniSparseTensorF32(!llvm.ptr<i8>)
func.func private @sptCheckF32(!llvm.ptr<i8>, !llvm.ptr<i8>) attributes {llvm.emit_c_interface}
func.func private @sptMoveF32(!llvm.ptr<i8>, i32, i32) -> !llvm.ptr<i8> attributes {llvm.emit_c_interface}
func.func private @sptSwapF32(!llvm.ptr<i8>, i32, i32) -> !llvm.ptr<i8> attributes {llvm.emit_c_interface}
func.func private @sptTileSplitF32(!llvm.ptr<i8>, i32, i32) -> !llvm.ptr<i8> attributes {llvm.emit_c_interface}
func.func private @sptSeparateF32(!llvm.ptr<i8>, i32) -> !llvm.ptr<i8> attributes {llvm.emit_c_interface}
func.func private @sptTrimF32(!llvm.ptr<i8>, i32) -> !llvm.ptr<i8> attributes {llvm.emit_c_interface}
func.func private @sptGrowF32(!llvm.ptr<i8>, i32) -> !llvm.ptr<i8> attributes {llvm.emit_c_interface}
func.func private @sptFuseF32(!llvm.ptr<i8>, i32) -> !llvm.ptr<i8> attributes {llvm.emit_c_interface}
func.func private @sptSumF32(!llvm.ptr<i8>, i32) -> !llvm.ptr<i8> attributes {llvm.emit_c_interface}
func.func private @sptEnumerateF32(!llvm.ptr<i8>, i32, i32) -> !llvm.ptr<i8> attributes {llvm.emit_c_interface}
func.func private @sptScheduleF32(!llvm.ptr<i8>, i32, i32, i32) -> !llvm.ptr<i8> attributes {llvm.emit_c_interface}
func.func private @sptPadF32(!llvm.ptr<i8>, i32, i32) -> !llvm.ptr<i8> attributes {llvm.emit_c_interface}
func.func private @sptReorderF32(!llvm.ptr<i8>, i32, i32) -> !llvm.ptr<i8> attributes {llvm.emit_c_interface}
func.func private @sptCustTrimF32(!llvm.ptr<i8>, i32) -> !llvm.ptr<i8> attributes {llvm.emit_c_interface}
func.func private @sptCopyF32(!llvm.ptr<i8>) -> !llvm.ptr<i8> attributes {llvm.emit_c_interface}
func.func private @sptFromFileF32(!llvm.ptr<i8>) -> !llvm.ptr<i8> attributes {llvm.emit_c_interface}
func.func private @rtclock() -> f64
Expand Down
Binary file added evaluation/FormatConversion/coo_cisr.o
Binary file not shown.
5 changes: 5 additions & 0 deletions evaluation/FormatConversion/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ mlir-opt ./UniSparse/unisparse_csb_dia_v.mlir -one-shot-bufferize="bufferize-fun
clang++ csb_dia_v.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils -L$LLVM_ROOT/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o csb_dia_v
mlir-opt ./UniSparse/unisparse_coo_c2sr.mlir -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" -finalizing-bufferize -convert-linalg-to-loops -convert-vector-to-scf -convert-scf-to-cf -lower-affine -convert-vector-to-llvm -convert-memref-to-llvm -convert-complex-to-standard -convert-math-to-llvm -convert-math-to-libm -convert-complex-to-libm -convert-complex-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o coo_c2sr.o
clang++ coo_c2sr.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils -L$LLVM_ROOT/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o coo_c2sr
mlir-opt ./UniSparse/unisparse_coo_cisr.mlir -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" -finalizing-bufferize -convert-linalg-to-loops -convert-vector-to-scf -convert-scf-to-cf -lower-affine -convert-vector-to-llvm -convert-memref-to-llvm -convert-complex-to-standard -convert-math-to-llvm -convert-math-to-libm -convert-complex-to-libm -convert-complex-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o coo_cisr.o
clang++ coo_cisr.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils -L$LLVM_ROOT/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o coo_cisr

mlir-opt ./sparse_tensor_dialect/sparse_tensor_csr_to_csc.mlir -sparse-compiler | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o sparse_tensor_csr_csc.o
clang++ sparse_tensor_csr_csc.o -L$LLVM_ROOT/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o sparse_tensor_csr_csc
Expand Down Expand Up @@ -56,4 +58,7 @@ do

echo COO_C2SR UniSparse
./coo_c2sr

echo COO_CISR UniSparse
./coo_cisr
done

0 comments on commit 66654df

Please sign in to comment.