Skip to content

Commit

Permalink
[Datatype] support both F32 and F64
Browse files Browse the repository at this point in the history
  • Loading branch information
Sibylau committed Nov 7, 2023
1 parent c497389 commit 4f79c47
Show file tree
Hide file tree
Showing 31 changed files with 3,216 additions and 2,317 deletions.
1,511 changes: 1,511 additions & 0 deletions lib/Runtime/UniSparseTempLibs.cpp

Large diffs are not rendered by default.

2,775 changes: 573 additions & 2,202 deletions lib/Runtime/UniSparseUtils.cpp

Large diffs are not rendered by default.

168 changes: 103 additions & 65 deletions lib/Transforms/LowerFormatConversionPass.cpp

Large diffs are not rendered by default.

Binary file removed test/UniSparse/Integrate/CPU/csc_spmm
Binary file not shown.
Binary file removed test/UniSparse/Integrate/CPU/csc_spmv
Binary file not shown.
Binary file removed test/UniSparse/Integrate/CPU/csr_spmm
Binary file not shown.
Binary file removed test/UniSparse/Integrate/CPU/csr_spmv
Binary file not shown.
Binary file removed test/UniSparse/Integrate/CPU/dcsc_spmm
Binary file not shown.
Binary file removed test/UniSparse/Integrate/CPU/dcsc_spmv
Binary file not shown.
Binary file removed test/UniSparse/Integrate/CPU/dcsr_spmm
Binary file not shown.
Binary file removed test/UniSparse/Integrate/CPU/dcsr_spmv
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
// unisparse-opt ./unisparse_coo_spmm.mlir -unisparse-codegen -lower-format-conversion -lower-struct -dce | \
// unisparse-opt ./unisparse_coo_spmm_F32.mlir -unisparse-codegen -lower-format-conversion -lower-struct -dce | \
// mlir-opt -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" \
// -finalizing-bufferize -convert-linalg-to-loops -convert-vector-to-scf -convert-scf-to-cf -lower-affine \
// -convert-vector-to-llvm -convert-memref-to-llvm -convert-complex-to-standard -convert-math-to-llvm \
// -convert-math-to-libm -convert-complex-to-libm -convert-complex-to-llvm -convert-func-to-llvm \
// -reconcile-unrealized-casts | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o coo_spmm.o
// -reconcile-unrealized-casts | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o coo_spmm_F32.o

// clang++ coo_spmm.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils \
// -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o coo_spmm
// clang++ coo_spmm_F32.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils \
// -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o coo_spmm_F32

// ./coo_spmm
// ./coo_spmm_F32

!Filename = !llvm.ptr<i8>

Expand Down
86 changes: 86 additions & 0 deletions test/UniSparse/Integrate/CPU/unisparse_coo_spmm_F64.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// unisparse-opt ./unisparse_coo_spmm_F64.mlir -unisparse-codegen -lower-format-conversion -lower-struct -dce | \
// mlir-opt -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" \
// -finalizing-bufferize -convert-linalg-to-loops -convert-vector-to-scf -convert-scf-to-cf -lower-affine \
// -convert-vector-to-llvm -convert-memref-to-llvm -convert-complex-to-standard -convert-math-to-llvm \
// -convert-math-to-libm -convert-complex-to-libm -convert-complex-to-llvm -convert-func-to-llvm \
// -reconcile-unrealized-casts | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o coo_spmm_F64.o

// clang++ coo_spmm_F64.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils \
// -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o coo_spmm_F64

// ./coo_spmm_F64

!Filename = !llvm.ptr<i8>

#COO = #unisparse.encoding<{
crdMap = #unisparse.crd<(i,j)->(i,j)>,
compressMap = #unisparse.compress<trim(0,1)>
}>

#trait1 = {
indexing_maps = [
affine_map<(i,j,k) -> (i, k)>, // A
affine_map<(i,j,k) -> (k, j)>, // B
affine_map<(i,j,k) -> (i, j)> // X (out)
],
iterator_types = ["parallel", "parallel", "reduction"],
doc = "X(i,j) =+ A(i,k) * B(k, j)"
}

module {
func.func private @rtclock() -> f64
func.func private @getTensorFilename(index) -> (!Filename)
func.func private @getTensorDim(!Filename, index) -> (index)

//CHECK-LABEL: func.func @main
func.func @main() {
%i0 = arith.constant 0.0 : f64
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c4 = arith.constant 1000 : index
%fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)

%A_0 = unisparse.fromFile (%fileName) : !Filename to tensor<?x?xf64, #COO>
%dim0 = call @getTensorDim(%fileName, %c0) : (!Filename, index) -> (index)
%dim1 = call @getTensorDim(%fileName, %c1) : (!Filename, index) -> (index)
// %dim0 = tensor.dim %A_0, %c0 : tensor<?x?xf64, #COO>
// %dim1 = tensor.dim %A_0, %c1 : tensor<?x?xf64, #COO>

// Initialize vector matrix.
%init_256_4 = memref.alloc(%dim1, %c4) : memref<?x?xf64>
%b = scf.for %i = %c0 to %dim1 step %c1 iter_args(%t = %init_256_4) -> memref<?x?xf64> {
%b2 = scf.for %j = %c0 to %c4 step %c1 iter_args(%t2 = %t) -> memref<?x?xf64> {
%k0 = arith.muli %i, %c4 : index
%k1 = arith.addi %j, %k0 : index
%k2 = arith.index_cast %k1 : index to i32
%k = arith.sitofp %k2 : i32 to f64
memref.store %k, %t2[%i, %j] : memref<?x?xf64>
scf.yield %t2 : memref<?x?xf64>
}
scf.yield %b2 : memref<?x?xf64>
}

%o0_4_4 = memref.alloc(%dim0, %c4) : memref<?x?xf64>
%o0 = scf.for %i = %c0 to %dim0 step %c1 iter_args(%t = %o0_4_4) -> memref<?x?xf64> {
%x2 = scf.for %j = %c0 to %c4 step %c1 iter_args(%t2 = %t) -> memref<?x?xf64> {
memref.store %i0, %t2[%i, %j] : memref<?x?xf64>
scf.yield %t2 : memref<?x?xf64>
}
scf.yield %x2 : memref<?x?xf64>
}

%t_start4 = call @rtclock() : () -> f64
%0 = unisparse.coo_spmm %A_0, %init_256_4, %o0_4_4: tensor<?x?xf64, #COO>, memref<?x?xf64>, memref<?x?xf64> to memref<?x?xf64>
%t_end4 = call @rtclock() : () -> f64
%t_4 = arith.subf %t_end4, %t_start4: f64
vector.print %t_4 : f64
%v1 = vector.transfer_read %init_256_4[%c0, %c0], %i0: memref<?x?xf64>, vector<4x4xf64>
vector.print %v1 : vector<4x4xf64>
%v0 = vector.transfer_read %0[%c0, %c0], %i0: memref<?x?xf64>, vector<4x4xf64>
vector.print %v0 : vector<4x4xf64>

//Release the resources
bufferization.dealloc_tensor %A_0 : tensor<?x?xf64, #COO>
return
}
}
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
// unisparse-opt ./unisparse_coo_spmv.mlir -unisparse-codegen -lower-format-conversion -lower-struct -dce | \
// unisparse-opt ./unisparse_coo_spmv_F32.mlir -unisparse-codegen -lower-format-conversion -lower-struct -dce | \
// mlir-opt -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" \
// -finalizing-bufferize -convert-linalg-to-loops -convert-vector-to-scf -convert-scf-to-cf -lower-affine \
// -convert-vector-to-llvm -convert-memref-to-llvm -convert-complex-to-standard -convert-math-to-llvm \
// -convert-math-to-libm -convert-complex-to-libm -convert-complex-to-llvm -convert-func-to-llvm \
// -reconcile-unrealized-casts | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o coo_spmv.o
// -reconcile-unrealized-casts | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o coo_spmv_F32.o

// clang++ coo_spmv.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils \
// -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o coo_spmv
// clang++ coo_spmv_F32.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils \
// -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o coo_spmv_F32

// ./coo_spmv
// ./coo_spmv_F32

!Filename = !llvm.ptr<i8>

Expand Down
84 changes: 84 additions & 0 deletions test/UniSparse/Integrate/CPU/unisparse_coo_spmv_F64.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// unisparse-opt ./unisparse_coo_spmv_F64.mlir -unisparse-codegen -lower-format-conversion -lower-struct -dce | \
// mlir-opt -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" \
// -finalizing-bufferize -convert-linalg-to-loops -convert-vector-to-scf -convert-scf-to-cf -lower-affine \
// -convert-vector-to-llvm -convert-memref-to-llvm -convert-complex-to-standard -convert-math-to-llvm \
// -convert-math-to-libm -convert-complex-to-libm -convert-complex-to-llvm -convert-func-to-llvm \
// -reconcile-unrealized-casts | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o coo_spmv_F64.o

// clang++ coo_spmv_F64.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils \
// -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o coo_spmv_F64

// ./coo_spmv_F64

!Filename = !llvm.ptr<i8>

#COO = #unisparse.encoding<{
crdMap = #unisparse.crd<(i,j)->(i,j)>,
compressMap = #unisparse.compress<trim(0,1)>
}>

#trait1 = {
indexing_maps = [
affine_map<(i,j) -> (i, j)>, // A
affine_map<(i,j) -> (j)>, // B
affine_map<(i,j) -> (i)> // X (out)
],
iterator_types = ["parallel", "reduction"],
doc = "X(i) =+ A(i,j) * B(j)"
}

module {
func.func private @rtclock() -> f64
func.func private @getTensorFilename(index) -> (!Filename)
func.func private @getTensorDim(!Filename, index) -> (index)
func.func private @printU64(index) -> ()

//CHECK-LABEL: func.func @main
func.func @main() {
%i0 = arith.constant 0.0 : f64
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index

%fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)

%A_0 = unisparse.fromFile (%fileName) : !Filename to tensor<?x?xf64, #COO>
%dim0 = call @getTensorDim(%fileName, %c0) : (!Filename, index) -> (index)
%dim1 = call @getTensorDim(%fileName, %c1) : (!Filename, index) -> (index)
call @printU64(%dim0) : (index) -> ()
call @printU64(%dim1) : (index) -> ()
// %dim0 = tensor.dim %A_0, %c0 : tensor<?x?xf64, #COO>
// %dim1 = tensor.dim %A_0, %c1 : tensor<?x?xf64, #COO>

// Initialize vector matrix.
%init_256_4 = memref.alloc(%dim1) : memref<?xf64>
%b = scf.for %i = %c0 to %dim1 step %c1 iter_args(%t = %init_256_4) -> memref<?xf64> {
%k0 = arith.muli %i, %c1 : index
%k1 = arith.index_cast %k0 : index to i32
%k = arith.sitofp %k1 : i32 to f64
memref.store %k, %t[%i] : memref<?xf64>
scf.yield %t : memref<?xf64>
}

%o0_4_4 = memref.alloc(%dim0) : memref<?xf64>
%o0 = scf.for %i = %c0 to %dim0 step %c1 iter_args(%t = %o0_4_4) -> memref<?xf64> {
memref.store %i0, %t[%i] : memref<?xf64>
scf.yield %t : memref<?xf64>
}

%t_start4 = call @rtclock() : () -> f64
%0 = unisparse.coo_spmv %A_0, %init_256_4, %o0_4_4: tensor<?x?xf64, #COO>, memref<?xf64>, memref<?xf64> to memref<?xf64>
%t_end4 = call @rtclock() : () -> f64
%t_4 = arith.subf %t_end4, %t_start4: f64
vector.print %t_4 : f64
%v1 = vector.transfer_read %init_256_4[%c0], %i0: memref<?xf64>, vector<4xf64>
vector.print %v1 : vector<4xf64>
%v0 = vector.transfer_read %0[%c0], %i0: memref<?xf64>, vector<4xf64>
vector.print %v0 : vector<4xf64>

//Release the resources
bufferization.dealloc_tensor %A_0 : tensor<?x?xf64, #COO>
// bufferization.dealloc_tensor %init_256_4 : tensor<?xf64>
// bufferization.dealloc_tensor %o0_4_4 : tensor<?xf64>
return
}
}
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
// unisparse-opt ./unisparse_csc_spmm.mlir -unisparse-codegen -lower-format-conversion -lower-struct -dce | \
// unisparse-opt ./unisparse_csc_spmm_F32.mlir -unisparse-codegen -lower-format-conversion -lower-struct -dce | \
// mlir-opt -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" \
// -finalizing-bufferize -convert-linalg-to-loops -convert-vector-to-scf -convert-scf-to-cf -lower-affine \
// -convert-vector-to-llvm -convert-memref-to-llvm -convert-complex-to-standard -convert-math-to-llvm \
// -convert-math-to-libm -convert-complex-to-libm -convert-complex-to-llvm -convert-func-to-llvm \
// -reconcile-unrealized-casts | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o spmm.o
// -reconcile-unrealized-casts | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o csc_spmm_F32.o

// clang++ spmm.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils \
// -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o spmm
// clang++ csc_spmm_F32.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils \
// -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o csc_spmm_F32

// ./spmm
// ./csc_spmm_F32

!Filename = !llvm.ptr<i8>

Expand Down
105 changes: 105 additions & 0 deletions test/UniSparse/Integrate/CPU/unisparse_csc_spmm_F64.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
// unisparse-opt ./unisparse_csc_spmm_F64.mlir -unisparse-codegen -lower-format-conversion -lower-struct -dce | \
// mlir-opt -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" \
// -finalizing-bufferize -convert-linalg-to-loops -convert-vector-to-scf -convert-scf-to-cf -lower-affine \
// -convert-vector-to-llvm -convert-memref-to-llvm -convert-complex-to-standard -convert-math-to-llvm \
// -convert-math-to-libm -convert-complex-to-libm -convert-complex-to-llvm -convert-func-to-llvm \
// -reconcile-unrealized-casts | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o csc_spmm_F64.o

// clang++ csc_spmm_F64.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils \
// -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o csc_spmm_F64

// ./csc_spmm_F64

!Filename = !llvm.ptr<i8>

#COO = #unisparse.encoding<{
crdMap = #unisparse.crd<(i,j)->(i,j)>,
compressMap = #unisparse.compress<trim(0,1)>
}>

#CSC = #unisparse.encoding<{
crdMap = #unisparse.crd<(i,j)->(j, i)>,
compressMap = #unisparse.compress<fuse(0), trim(1,1)>
}>

#trait1 = {
indexing_maps = [
affine_map<(i,j,k) -> (i, k)>, // A
affine_map<(i,j,k) -> (k, j)>, // B
affine_map<(i,j,k) -> (i, j)> // X (out)
],
iterator_types = ["parallel", "parallel", "reduction"],
doc = "X(i,j) =+ A(i,k) * B(k, j)"
}

module {
func.func private @rtclock() -> f64
func.func private @getTensorFilename(index) -> (!Filename)

func.func @kernel_csc_spmm(%arg0: tensor<?x?xf64, #CSC>, %arg1: tensor<?x?xf64>, %argx: tensor<?x?xf64>) -> tensor<?x?xf64> {
%0 = linalg.generic #trait1
ins(%arg0, %arg1 : tensor<?x?xf64, #CSC>, tensor<?x?xf64>)
outs(%argx: tensor<?x?xf64>) {
^bb0(%a: f64, %b: f64, %x: f64):
%2 = arith.mulf %a, %b : f64
%3 = arith.addf %x, %2 : f64
linalg.yield %3 : f64
} -> tensor<?x?xf64>
return %0 : tensor<?x?xf64>
}

//CHECK-LABEL: func.func @main
func.func @main() {
%i0 = arith.constant 0.0 : f64
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c4 = arith.constant 1000 : index

%fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)

%t_start1 = call @rtclock() : () -> f64
%A_1 = unisparse.fromFile (%fileName) : !Filename to tensor<?x?xf64, #COO>
%c256 = tensor.dim %A_1, %c1 : tensor<?x?xf64, #COO>
%a1 = unisparse.convert (%A_1): tensor<?x?xf64, #COO> to tensor<?x?xf64, #CSC>
%t_end1 = call @rtclock() : () -> f64
%t_1 = arith.subf %t_end1, %t_start1: f64
vector.print %t_1 : f64

// Initialize dense matrix.
%init_256_4 = bufferization.alloc_tensor(%c256, %c4) : tensor<?x?xf64>
%b = scf.for %i = %c0 to %c256 step %c1 iter_args(%t = %init_256_4) -> tensor<?x?xf64> {
%b2 = scf.for %j = %c0 to %c4 step %c1 iter_args(%t2 = %t) -> tensor<?x?xf64> {
%k0 = arith.muli %i, %c4 : index
%k1 = arith.addi %j, %k0 : index
%k2 = arith.index_cast %k1 : index to i32
%k = arith.sitofp %k2 : i32 to f64
%t3 = tensor.insert %k into %t2[%i, %j] : tensor<?x?xf64>
scf.yield %t3 : tensor<?x?xf64>
}
scf.yield %b2 : tensor<?x?xf64>
}

%o1_4_4 = bufferization.alloc_tensor(%c256, %c4) : tensor<?x?xf64>
%o1 = scf.for %i = %c0 to %c256 step %c1 iter_args(%t = %o1_4_4) -> tensor<?x?xf64> {
%x2 = scf.for %j = %c0 to %c4 step %c1 iter_args(%t2 = %t) -> tensor<?x?xf64> {
%t3 = tensor.insert %i0 into %t2[%i, %j] : tensor<?x?xf64>
scf.yield %t3 : tensor<?x?xf64>
}
scf.yield %x2 : tensor<?x?xf64>
}

%t_start5 = call @rtclock() : () -> f64
%1 = call @kernel_csc_spmm(%a1, %b, %o1) : (tensor<?x?xf64, #CSC>, tensor<?x?xf64>, tensor<?x?xf64>) -> tensor<?x?xf64>
%t_end5 = call @rtclock() : () -> f64
%t_5 = arith.subf %t_end5, %t_start5: f64
vector.print %t_5 : f64
%v1 = vector.transfer_read %1[%c0, %c0], %i0: tensor<?x?xf64>, vector<4x4xf64>
vector.print %v1 : vector<4x4xf64>

//Release the resources
bufferization.dealloc_tensor %A_1 : tensor<?x?xf64, #COO>
// bufferization.dealloc_tensor %init_256_4 : tensor<?x?xf64>
// bufferization.dealloc_tensor %o1_4_4 : tensor<?x?xf64>
return
}
}
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
// unisparse-opt ./unisparse_csc_spmv.mlir -unisparse-codegen -lower-format-conversion -lower-struct -dce | \
// unisparse-opt ./unisparse_csc_spmv_F32.mlir -unisparse-codegen -lower-format-conversion -lower-struct -dce | \
// mlir-opt -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" \
// -finalizing-bufferize -convert-linalg-to-loops -convert-vector-to-scf -convert-scf-to-cf -lower-affine \
// -convert-vector-to-llvm -convert-memref-to-llvm -convert-complex-to-standard -convert-math-to-llvm \
// -convert-math-to-libm -convert-complex-to-libm -convert-complex-to-llvm -convert-func-to-llvm \
// -reconcile-unrealized-casts | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o spmv.o
// -reconcile-unrealized-casts | mlir-translate -mlir-to-llvmir | opt -O3 -S | llc -O3 -relocation-model=pic -filetype=obj -o csc_spmv_F32.o

// clang++ spmv.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils \
// -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o spmv
// clang++ csc_spmv_F32.o -L$SPLHOME/build/lib -lmlir_unisparse_runner_utils \
// -L$LLVMHOME/build/lib -lmlir_runner_utils -lmlir_c_runner_utils -o csc_spmv_F32

// ./spmv
// ./csc_spmv_F32

!Filename = !llvm.ptr<i8>

Expand Down
Loading

0 comments on commit 4f79c47

Please sign in to comment.