From ae663e76e7b88936e2a37ad2f98fd497f89a1445 Mon Sep 17 00:00:00 2001 From: artemiipatov Date: Tue, 28 Nov 2023 20:58:37 +0300 Subject: [PATCH 1/7] perf: kronecker --- src/GraphBLAS-sharp.Backend/Common/ClArray.fs | 49 +++++- src/GraphBLAS-sharp.Backend/Common/Map.fs | 30 ++++ .../Operations/Kronecker.fs | 148 +++++++++++------- 3 files changed, 165 insertions(+), 62 deletions(-) diff --git a/src/GraphBLAS-sharp.Backend/Common/ClArray.fs b/src/GraphBLAS-sharp.Backend/Common/ClArray.fs index dacb249e..f91b8b5a 100644 --- a/src/GraphBLAS-sharp.Backend/Common/ClArray.fs +++ b/src/GraphBLAS-sharp.Backend/Common/ClArray.fs @@ -699,11 +699,11 @@ module ClArray = bound<'a, int> Search.Bin.lowerBound clContext /// - /// Gets the value at the specified position from the input array. + /// Gets the value at the specified position from the input array and insert it into given ClCell. /// /// OpenCL context. /// Should be a power of 2 and greater than 1. - let item<'a> (clContext: ClContext) workGroupSize = + let itemTo<'a> (clContext: ClContext) workGroupSize = let kernel = <@ fun (ndRange: Range1D) index (array: ClArray<'a>) (result: ClCell<'a>) -> @@ -715,21 +715,37 @@ module ClArray = let program = clContext.Compile kernel - fun (processor: MailboxProcessor<_>) (index: int) (array: ClArray<'a>) -> + fun (processor: MailboxProcessor<_>) (index: int) (array: ClArray<'a>) (output: ClCell<'a>) -> if index < 0 || index >= array.Length then failwith "Index out of range" - let result = - clContext.CreateClCell Unchecked.defaultof<'a> - let kernel = program.GetKernel() let ndRange = Range1D.CreateValid(1, workGroupSize) - processor.Post(Msg.MsgSetArguments(fun () -> kernel.KernelFunc ndRange index array result)) + processor.Post(Msg.MsgSetArguments(fun () -> kernel.KernelFunc ndRange index array output)) processor.Post(Msg.CreateRunMsg<_, _> kernel) + /// + /// Gets the value at the specified position from the input array. + /// + /// OpenCL context. + /// Should be a power of 2 and greater than 1. + let item<'a> (clContext: ClContext) workGroupSize = + + let itemTo = itemTo clContext workGroupSize + + fun (processor: MailboxProcessor<_>) (index: int) (array: ClArray<'a>) -> + + if index < 0 || index >= array.Length then + failwith "Index out of range" + + let result = + clContext.CreateClCell Unchecked.defaultof<'a> + + itemTo processor index array result + result /// @@ -781,3 +797,22 @@ module ClArray = bitmap.Free processor result + + /// + /// Builds a new array whose elements are the results of applying the given function + /// to each of the elements of the array. + /// + /// The function to transform elements of the array. + /// OpenCL context. + /// Should be a power of 2 and greater than 1. + let map<'a, 'b> (op: Expr<'a -> 'b>) (clContext: ClContext) workGroupSize = Map.map op clContext workGroupSize + + /// + /// Builds a new array whose elements are the results of applying the given function + /// to each of the elements of the array. + /// + /// The function to transform elements of the array. + /// OpenCL context. + /// Should be a power of 2 and greater than 1. + let mapInPlace<'a> (op: Expr<'a -> 'a>) (clContext: ClContext) workGroupSize = + Map.mapInPlace op clContext workGroupSize diff --git a/src/GraphBLAS-sharp.Backend/Common/Map.fs b/src/GraphBLAS-sharp.Backend/Common/Map.fs index f22f0f6b..309055a0 100644 --- a/src/GraphBLAS-sharp.Backend/Common/Map.fs +++ b/src/GraphBLAS-sharp.Backend/Common/Map.fs @@ -40,6 +40,36 @@ module Map = result + /// + /// Changes elements of the input array, applying the given function + /// to each element of the array. + /// + /// The function to transform elements of the array. + /// OpenCL context. + /// Should be a power of 2 and greater than 1. + let mapInPlace<'a> (op: Expr<'a -> 'a>) (clContext: ClContext) workGroupSize = + + let map = + <@ fun (ndRange: Range1D) length (inputArray: ClArray<'a>) -> + + let gid = ndRange.GlobalID0 + + if gid < length then + inputArray.[gid] <- (%op) inputArray.[gid] @> + + let kernel = clContext.Compile map + + fun (processor: MailboxProcessor<_>) (inputArray: ClArray<'a>) -> + + let ndRange = + Range1D.CreateValid(inputArray.Length, workGroupSize) + + let kernel = kernel.GetKernel() + + processor.Post(Msg.MsgSetArguments(fun () -> kernel.KernelFunc ndRange inputArray.Length inputArray)) + + processor.Post(Msg.CreateRunMsg<_, _>(kernel)) + /// /// Builds a new array whose elements are the results of applying the given function /// to the corresponding pairs of values, where the first element of pair is from the given array diff --git a/src/GraphBLAS-sharp.Backend/Operations/Kronecker.fs b/src/GraphBLAS-sharp.Backend/Operations/Kronecker.fs index 9ff810bf..fe3d6159 100644 --- a/src/GraphBLAS-sharp.Backend/Operations/Kronecker.fs +++ b/src/GraphBLAS-sharp.Backend/Operations/Kronecker.fs @@ -171,7 +171,7 @@ module internal Kronecker = let private setPositions<'c when 'c: struct> (clContext: ClContext) workGroupSize = let setPositions = - <@ fun (ndRange: Range1D) rowCount columnCount startIndex (rowOffset: ClCell) (columnOffset: ClCell) (bitmap: ClArray) (values: ClArray<'c>) (resultRows: ClArray) (resultColumns: ClArray) (resultValues: ClArray<'c>) -> + <@ fun (ndRange: Range1D) rowCount columnCount startIndex (rowOffset: int) (columnOffset: int) (bitmap: ClArray) (values: ClArray<'c>) (resultRows: ClArray) (resultColumns: ClArray) (resultValues: ClArray<'c>) -> let gid = ndRange.GlobalID0 @@ -184,8 +184,8 @@ module internal Kronecker = let index = startIndex + bitmap.[gid] - 1 - resultRows.[index] <- rowIndex + rowOffset.Value - resultColumns.[index] <- columnIndex + columnOffset.Value + resultRows.[index] <- rowIndex + rowOffset + resultColumns.[index] <- columnIndex + columnOffset resultValues.[index] <- values.[gid] @> let kernel = clContext.Compile <| setPositions @@ -202,9 +202,6 @@ module internal Kronecker = let kernel = kernel.GetKernel() - let rowOffset = rowOffset |> clContext.CreateClCell - let columnOffset = columnOffset |> clContext.CreateClCell - processor.Post( Msg.MsgSetArguments (fun () -> @@ -224,23 +221,20 @@ module internal Kronecker = processor.Post(Msg.CreateRunMsg<_, _> kernel) - rowOffset.Free processor - columnOffset.Free processor - (sum.ToHostAndFree processor) + startIndex let private copyToResult (clContext: ClContext) workGroupSize = let copyToResult = - <@ fun (ndRange: Range1D) startIndex sourceLength (rowOffset: ClCell) (columnOffset: ClCell) (sourceRows: ClArray) (sourceColumns: ClArray) (sourceValues: ClArray<'c>) (resultRows: ClArray) (resultColumns: ClArray) (resultValues: ClArray<'c>) -> + <@ fun (ndRange: Range1D) startIndex sourceLength (rowOffset: int) (columnOffset: int) (sourceRows: ClArray) (sourceColumns: ClArray) (sourceValues: ClArray<'c>) (resultRows: ClArray) (resultColumns: ClArray) (resultValues: ClArray<'c>) -> let gid = ndRange.GlobalID0 if gid < sourceLength then let index = startIndex + gid - resultRows.[index] <- rowOffset.Value + sourceRows.[gid] - resultColumns.[index] <- columnOffset.Value + sourceColumns.[gid] + resultRows.[index] <- rowOffset + sourceRows.[gid] + resultColumns.[index] <- columnOffset + sourceColumns.[gid] resultValues.[index] <- sourceValues.[gid] @> let kernel = clContext.Compile <| copyToResult @@ -252,9 +246,6 @@ module internal Kronecker = let kernel = kernel.GetKernel() - let rowOffset = rowOffset |> clContext.CreateClCell - let columnOffset = columnOffset |> clContext.CreateClCell - processor.Post( Msg.MsgSetArguments (fun () -> @@ -274,14 +265,11 @@ module internal Kronecker = processor.Post(Msg.CreateRunMsg<_, _> kernel) - rowOffset.Free processor - columnOffset.Free processor - let private insertZero (clContext: ClContext) workGroupSize = let copy = copyToResult clContext workGroupSize - fun queue startIndex (zeroCounts: int list array) (matrixZero: COO<'c>) resultMatrix -> + fun queue startIndex (zeroCounts: ClArray array) (matrixZero: COO<'c>) resultMatrix -> let rowCount = zeroCounts.Length @@ -298,20 +286,19 @@ module internal Kronecker = startIndex <- startIndex + matrixZero.NNZ - let rec insertInRowRec zeroCounts row column = - match zeroCounts with - | [] -> () - | h :: tl -> - insertMany row column h + for row in 0 .. rowCount - 1 do + let zeroCountInRow = zeroCounts.[row].ToHostAndFree queue - insertInRowRec tl row (h + column + 1) + let mutable column = 0 - for row in 0 .. rowCount - 1 do - insertInRowRec zeroCounts.[row] row 0 + for count in zeroCountInRow do + insertMany row column count + + column <- column + count + 1 let private insertNonZero (clContext: ClContext) workGroupSize op = - let item = ClArray.item clContext workGroupSize + let itemTo = ClArray.itemTo clContext workGroupSize let preparePositions = preparePositions clContext workGroupSize op @@ -336,11 +323,14 @@ module internal Kronecker = let mutable startIndex = 0 + let value = clContext.CreateClCell Unchecked.defaultof<'a> + for row in 0 .. rowCount - 1 do let leftEdge, rightEdge = rowsEdges.[row] for i in leftEdge .. rightEdge do - let value = item queue i leftValues + itemTo queue i leftValues value + let column = leftColsHost.[i] let rowOffset = row * matrixRight.RowCount @@ -348,21 +338,79 @@ module internal Kronecker = preparePositions queue value matrixRight mappedMatrix bitmap - value.Free queue - startIndex <- setPositions rowOffset columnOffset startIndex resultMatrix mappedMatrix bitmap + value.Free queue bitmap.Free queue mappedMatrix.Free queue startIndex + let private countZeroElements + (clContext: ClContext) + workGroupSize + = + + let countZeroElementsInRow = + <@ fun (ndRange: Range1D) (row: int) (nnzInRow: int) (columnCount: int) (columns: ClArray) (rowsEdges: ClArray) (result: ClArray) -> + + let gid = ndRange.GlobalID0 + + if gid <= nnzInRow then + + let firstIndex = fst rowsEdges.[row] + let lastIndex = snd rowsEdges.[row] + + if nnzInRow = 0 then + result.[0] <- columnCount + + elif gid = nnzInRow then + result.[nnzInRow] <- columnCount - columns.[lastIndex] - 1 + + elif gid = 0 then + result.[0] <- columns.[firstIndex] + + else + result.[gid] <- columns.[firstIndex + gid] - columns.[firstIndex + gid - 1] - 1 @> + + let kernel = clContext.Compile countZeroElementsInRow + + fun (queue: MailboxProcessor<_>) (matrix: CSR<_>) (rowsEdges: ClArray) (nnzInRows: int array) -> + + let kernel = kernel.GetKernel() + + let (zeroCounts: ClArray array) = Array.zeroCreate matrix.RowCount + + for row in 0 .. matrix.RowCount - 1 do + + let length = nnzInRows.[row] + 1 + + let ndRange = + Range1D.CreateValid(length, workGroupSize) + + let result = clContext.CreateClArrayWithSpecificAllocationMode(DeviceOnly, length) + + queue.Post(Msg.MsgSetArguments(fun () -> kernel.KernelFunc ndRange row nnzInRows.[row] matrix.ColumnCount matrix.Columns rowsEdges result)) + queue.Post(Msg.CreateRunMsg<_, _>(kernel)) + + zeroCounts.[row] <- result + + zeroCounts + let private mapAll<'a, 'b, 'c when 'a: struct and 'b: struct and 'c: struct and 'c: equality> (clContext: ClContext) workGroupSize (op: Expr<'a option -> 'b option -> 'c option>) = + let pairwise = ClArray.pairwise clContext workGroupSize + + let mapSubtract = ClArray.map <@ fun (a, b) -> b - a @> clContext workGroupSize + + let mapInPlace = ClArray.mapInPlace <@ fun (a, b) -> (a, b - 1) @> clContext workGroupSize + + let countZeroElements = countZeroElements clContext workGroupSize + let insertNonZero = insertNonZero clContext workGroupSize op let insertZero = insertZero clContext workGroupSize @@ -386,32 +434,22 @@ module internal Kronecker = RowCount = matrixLeft.RowCount * matrixRight.RowCount ColumnCount = matrixLeft.ColumnCount * matrixRight.ColumnCount } - let leftRowPointers = matrixLeft.RowPointers.ToHost queue let leftColumns = matrixLeft.Columns.ToHost queue - let nnzInRows = - leftRowPointers - |> Array.pairwise - |> Array.map (fun (fst, snd) -> snd - fst) - - let rowsEdges = - leftRowPointers - |> Array.pairwise - |> Array.map (fun (fst, snd) -> (fst, snd - 1)) - - let (zeroCounts: int list array) = Array.zeroCreate matrixLeft.RowCount - - { 0 .. matrixLeft.RowCount - 1 } - |> Seq.iter2 - (fun edges i -> - zeroCounts.[i] <- - leftColumns.[fst edges..snd edges] - |> Array.toList - |> List.insertAt 0 -1 - |> List.insertAt (nnzInRows.[i] + 1) matrixLeft.ColumnCount - |> List.pairwise - |> List.map (fun (fstCol, sndCol) -> sndCol - fstCol - 1)) - rowsEdges + let pairsOfRowPointers = + pairwise queue DeviceOnly matrixLeft.RowPointers + |> Option.defaultWith + (fun () -> failwith "The state of the matrix is broken. The length of the rowPointers must be >= 2") + + let nnzInRows = (mapSubtract queue DeviceOnly pairsOfRowPointers).ToHostAndFree queue + + mapInPlace queue pairsOfRowPointers + + let rowsEdges = pairsOfRowPointers + + let zeroCounts = countZeroElements queue matrixLeft rowsEdges nnzInRows + + let rowsEdges = rowsEdges.ToHostAndFree queue // TODO: get rid of transfer to host let startIndex = insertNonZero queue rowsEdges matrixRight matrixLeft.Values leftColumns resultMatrix From 38d5846931c11731b892c085ff3600dbb6abfc53 Mon Sep 17 00:00:00 2001 From: artemiipatov Date: Tue, 28 Nov 2023 21:35:03 +0300 Subject: [PATCH 2/7] refactor: kronecker --- .../Operations/Kronecker.fs | 35 +++++++++---------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/src/GraphBLAS-sharp.Backend/Operations/Kronecker.fs b/src/GraphBLAS-sharp.Backend/Operations/Kronecker.fs index fe3d6159..de703962 100644 --- a/src/GraphBLAS-sharp.Backend/Operations/Kronecker.fs +++ b/src/GraphBLAS-sharp.Backend/Operations/Kronecker.fs @@ -305,12 +305,12 @@ module internal Kronecker = let setPositions = setPositions clContext workGroupSize - fun queue (rowsEdges: (int * int) array) (matrixRight: CSR<'b>) (leftValues: ClArray<'a>) (leftColsHost: int array) (resultMatrix: COO<'c>) -> + fun queue (rowBoundaries: (int * int) array) (matrixRight: CSR<'b>) (leftValues: ClArray<'a>) (leftColsHost: int array) (resultMatrix: COO<'c>) -> let setPositions = setPositions queue matrixRight.RowCount matrixRight.ColumnCount - let rowCount = rowsEdges.Length + let rowCount = rowBoundaries.Length let length = matrixRight.RowCount * matrixRight.ColumnCount @@ -326,7 +326,7 @@ module internal Kronecker = let value = clContext.CreateClCell Unchecked.defaultof<'a> for row in 0 .. rowCount - 1 do - let leftEdge, rightEdge = rowsEdges.[row] + let leftEdge, rightEdge = rowBoundaries.[row] for i in leftEdge .. rightEdge do itemTo queue i leftValues value @@ -352,14 +352,13 @@ module internal Kronecker = = let countZeroElementsInRow = - <@ fun (ndRange: Range1D) (row: int) (nnzInRow: int) (columnCount: int) (columns: ClArray) (rowsEdges: ClArray) (result: ClArray) -> + <@ fun (ndRange: Range1D) (firstIndex: int) (lastIndex: int) (columnCount: int) (columns: ClArray) (result: ClArray) -> let gid = ndRange.GlobalID0 - if gid <= nnzInRow then + let nnzInRow = lastIndex - firstIndex + 1 - let firstIndex = fst rowsEdges.[row] - let lastIndex = snd rowsEdges.[row] + if gid <= nnzInRow then if nnzInRow = 0 then result.[0] <- columnCount @@ -375,7 +374,7 @@ module internal Kronecker = let kernel = clContext.Compile countZeroElementsInRow - fun (queue: MailboxProcessor<_>) (matrix: CSR<_>) (rowsEdges: ClArray) (nnzInRows: int array) -> + fun (queue: MailboxProcessor<_>) (matrix: CSR<_>) (rowBoundaries: (int * int) array) -> let kernel = kernel.GetKernel() @@ -383,14 +382,18 @@ module internal Kronecker = for row in 0 .. matrix.RowCount - 1 do - let length = nnzInRows.[row] + 1 + let firstIndex = fst rowBoundaries.[row] + let lastIndex = snd rowBoundaries.[row] + + let nnzInRow = lastIndex - firstIndex + 1 + let length = nnzInRow + 1 let ndRange = Range1D.CreateValid(length, workGroupSize) let result = clContext.CreateClArrayWithSpecificAllocationMode(DeviceOnly, length) - queue.Post(Msg.MsgSetArguments(fun () -> kernel.KernelFunc ndRange row nnzInRows.[row] matrix.ColumnCount matrix.Columns rowsEdges result)) + queue.Post(Msg.MsgSetArguments(fun () -> kernel.KernelFunc ndRange firstIndex lastIndex matrix.ColumnCount matrix.Columns result)) queue.Post(Msg.CreateRunMsg<_, _>(kernel)) zeroCounts.[row] <- result @@ -405,8 +408,6 @@ module internal Kronecker = let pairwise = ClArray.pairwise clContext workGroupSize - let mapSubtract = ClArray.map <@ fun (a, b) -> b - a @> clContext workGroupSize - let mapInPlace = ClArray.mapInPlace <@ fun (a, b) -> (a, b - 1) @> clContext workGroupSize let countZeroElements = countZeroElements clContext workGroupSize @@ -441,18 +442,14 @@ module internal Kronecker = |> Option.defaultWith (fun () -> failwith "The state of the matrix is broken. The length of the rowPointers must be >= 2") - let nnzInRows = (mapSubtract queue DeviceOnly pairsOfRowPointers).ToHostAndFree queue - mapInPlace queue pairsOfRowPointers - let rowsEdges = pairsOfRowPointers - - let zeroCounts = countZeroElements queue matrixLeft rowsEdges nnzInRows + let rowBoundaries = pairsOfRowPointers.ToHostAndFree queue - let rowsEdges = rowsEdges.ToHostAndFree queue // TODO: get rid of transfer to host + let zeroCounts = countZeroElements queue matrixLeft rowBoundaries let startIndex = - insertNonZero queue rowsEdges matrixRight matrixLeft.Values leftColumns resultMatrix + insertNonZero queue rowBoundaries matrixRight matrixLeft.Values leftColumns resultMatrix matrixZero |> Option.iter (fun m -> insertZero queue startIndex zeroCounts m resultMatrix) From 2d2d0f0a9dd3c1650cf422a6362eba6b916117f2 Mon Sep 17 00:00:00 2001 From: artemiipatov Date: Sat, 2 Dec 2023 17:14:24 +0300 Subject: [PATCH 3/7] add: kronecker benchmarks --- .../GraphBLAS-sharp.Benchmarks.fsproj | 1 + .../Matrix/Kronecker.fs | 142 ++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 benchmarks/GraphBLAS-sharp.Benchmarks/Matrix/Kronecker.fs diff --git a/benchmarks/GraphBLAS-sharp.Benchmarks/GraphBLAS-sharp.Benchmarks.fsproj b/benchmarks/GraphBLAS-sharp.Benchmarks/GraphBLAS-sharp.Benchmarks.fsproj index 6e8486b0..775f5d4f 100644 --- a/benchmarks/GraphBLAS-sharp.Benchmarks/GraphBLAS-sharp.Benchmarks.fsproj +++ b/benchmarks/GraphBLAS-sharp.Benchmarks/GraphBLAS-sharp.Benchmarks.fsproj @@ -23,6 +23,7 @@ + diff --git a/benchmarks/GraphBLAS-sharp.Benchmarks/Matrix/Kronecker.fs b/benchmarks/GraphBLAS-sharp.Benchmarks/Matrix/Kronecker.fs new file mode 100644 index 00000000..ce397d78 --- /dev/null +++ b/benchmarks/GraphBLAS-sharp.Benchmarks/Matrix/Kronecker.fs @@ -0,0 +1,142 @@ +module GraphBLAS.FSharp.Benchmarks.Matrix.SpGeMM.Expand + +open System.IO +open BenchmarkDotNet.Attributes +open Brahma.FSharp +open GraphBLAS.FSharp +open GraphBLAS.FSharp.IO +open GraphBLAS.FSharp.Backend.Quotes +open GraphBLAS.FSharp.Objects +open GraphBLAS.FSharp.Objects.ClContextExtensions +open GraphBLAS.FSharp.Benchmarks + +[] +[] +[] +[)>] +type Benchmarks<'elem when 'elem : struct>( + buildFunToBenchmark, + converter: string -> 'elem, + converterBool, + buildMatrix) = + + let mutable funToBenchmark = None + + let mutable matrix = Unchecked.defaultof> + + let mutable matrixHost = Unchecked.defaultof<_> + + member val ResultMatrix = Unchecked.defaultof option> with get, set + + [] + member val OclContextInfo = Unchecked.defaultof with get, set + + [] + member val InputMatrixReader = Unchecked.defaultof with get, set + + member this.OclContext: ClContext = (fst this.OclContextInfo).ClContext + member this.WorkGroupSize = snd this.OclContextInfo + + member this.Processor = + let p = (fst this.OclContextInfo).Queue + p.Error.Add(fun e -> failwithf "%A" e) + p + + static member AvailableContexts = Utils.availableContexts + + static member InputMatrixProviderBuilder pathToConfig = + let datasetFolder = "" + pathToConfig + |> Utils.getMatricesFilenames + |> Seq.map + (fun matrixFilename -> + printfn "%A" matrixFilename + + match Path.GetExtension matrixFilename with + | ".mtx" -> + MtxReader(Utils.getFullPathToMatrix datasetFolder matrixFilename) + | _ -> failwith "Unsupported matrix format") + + member this.FunToBenchmark = + match funToBenchmark with + | None -> + let x = buildFunToBenchmark this.OclContext this.WorkGroupSize + funToBenchmark <- Some x + x + | Some x -> x + + member this.ReadMatrix (reader: MtxReader) = + let converter = + match reader.Field with + | Pattern -> converterBool + | _ -> converter + + reader.ReadMatrix converter + + member this.Mxm() = + this.ResultMatrix <- this.FunToBenchmark this.Processor DeviceOnly matrix matrix + + member this.ClearInputMatrices() = + matrix.Dispose this.Processor + + member this.ClearResult() = + match this.ResultMatrix with + | Some matrix -> matrix.Dispose this.Processor + | None -> () + + member this.ReadMatrices() = + matrixHost <- this.ReadMatrix this.InputMatrixReader + + member this.LoadMatricesToGPU () = + matrix <- buildMatrix this.OclContext matrixHost + + abstract member GlobalSetup : unit -> unit + + abstract member Benchmark : unit -> unit + + abstract member IterationCleanup : unit -> unit + + abstract member GlobalCleanup : unit -> unit + +module WithoutTransfer = + type Benchmark<'elem when 'elem : struct>( + buildFunToBenchmark, + converter: string -> 'elem, + converterBool, + buildMatrix) = + + inherit Benchmarks<'elem>( + buildFunToBenchmark, + converter, + converterBool, + buildMatrix) + + [] + override this.GlobalSetup() = + this.ReadMatrices() + this.LoadMatricesToGPU() + + [] + override this.Benchmark() = + this.Mxm() + this.Processor.PostAndReply(Msg.MsgNotifyMe) + + [] + override this.IterationCleanup () = + this.ClearResult() + + [] + override this.GlobalCleanup () = + this.ClearInputMatrices() + + type Float32() = + + inherit Benchmark( + Operations.kronecker (ArithmeticOperations.float32MulOption), + float32, + (fun _ -> Utils.nextSingle (System.Random())), + (fun context matrix -> ClMatrix.CSR <| matrix.ToCSR.ToDevice context) + ) + + static member InputMatrixProvider = + Benchmarks<_>.InputMatrixProviderBuilder "SpGeMM.txt" From ed6e5f2a78ac9fc3a77615eaebb92819548f921a Mon Sep 17 00:00:00 2001 From: artemiipatov Date: Tue, 5 Dec 2023 20:48:14 +0300 Subject: [PATCH 4/7] add: kronecker benchmark config --- benchmarks/GraphBLAS-sharp.Benchmarks/Configs/Kronecker.txt | 3 +++ benchmarks/GraphBLAS-sharp.Benchmarks/Matrix/Kronecker.fs | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 benchmarks/GraphBLAS-sharp.Benchmarks/Configs/Kronecker.txt diff --git a/benchmarks/GraphBLAS-sharp.Benchmarks/Configs/Kronecker.txt b/benchmarks/GraphBLAS-sharp.Benchmarks/Configs/Kronecker.txt new file mode 100644 index 00000000..b16b59a8 --- /dev/null +++ b/benchmarks/GraphBLAS-sharp.Benchmarks/Configs/Kronecker.txt @@ -0,0 +1,3 @@ +can_634 +Si2 +lshp1561 diff --git a/benchmarks/GraphBLAS-sharp.Benchmarks/Matrix/Kronecker.fs b/benchmarks/GraphBLAS-sharp.Benchmarks/Matrix/Kronecker.fs index ce397d78..a5e39576 100644 --- a/benchmarks/GraphBLAS-sharp.Benchmarks/Matrix/Kronecker.fs +++ b/benchmarks/GraphBLAS-sharp.Benchmarks/Matrix/Kronecker.fs @@ -139,4 +139,4 @@ module WithoutTransfer = ) static member InputMatrixProvider = - Benchmarks<_>.InputMatrixProviderBuilder "SpGeMM.txt" + Benchmarks<_>.InputMatrixProviderBuilder "Kronecker.txt" From 2cd153542d431579d47ffffd455a6756dbdadb31 Mon Sep 17 00:00:00 2001 From: artemiipatov Date: Tue, 12 Dec 2023 19:02:54 +0300 Subject: [PATCH 5/7] refactor: kronecker benchmarks --- benchmarks/GraphBLAS-sharp.Benchmarks/Matrix/Kronecker.fs | 2 +- benchmarks/GraphBLAS-sharp.Benchmarks/Program.fs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/GraphBLAS-sharp.Benchmarks/Matrix/Kronecker.fs b/benchmarks/GraphBLAS-sharp.Benchmarks/Matrix/Kronecker.fs index a5e39576..f8c7b880 100644 --- a/benchmarks/GraphBLAS-sharp.Benchmarks/Matrix/Kronecker.fs +++ b/benchmarks/GraphBLAS-sharp.Benchmarks/Matrix/Kronecker.fs @@ -1,4 +1,4 @@ -module GraphBLAS.FSharp.Benchmarks.Matrix.SpGeMM.Expand +module GraphBLAS.FSharp.Benchmarks.Matrix.Kronecker open System.IO open BenchmarkDotNet.Attributes diff --git a/benchmarks/GraphBLAS-sharp.Benchmarks/Program.fs b/benchmarks/GraphBLAS-sharp.Benchmarks/Program.fs index 5a3ccf37..4a0e897d 100644 --- a/benchmarks/GraphBLAS-sharp.Benchmarks/Program.fs +++ b/benchmarks/GraphBLAS-sharp.Benchmarks/Program.fs @@ -4,7 +4,7 @@ open BenchmarkDotNet.Running [] let main argv = let benchmarks = - BenchmarkSwitcher [| typeof |] + BenchmarkSwitcher [| typeof |] benchmarks.Run argv |> ignore 0 From cdacf37a5415c347a30dcc016319e67fa0145239 Mon Sep 17 00:00:00 2001 From: artemiipatov Date: Fri, 15 Dec 2023 20:57:53 +0300 Subject: [PATCH 6/7] refactor: benchmarks --- benchmarks/GraphBLAS-sharp.Benchmarks/Program.fs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/GraphBLAS-sharp.Benchmarks/Program.fs b/benchmarks/GraphBLAS-sharp.Benchmarks/Program.fs index 4a0e897d..5a3ccf37 100644 --- a/benchmarks/GraphBLAS-sharp.Benchmarks/Program.fs +++ b/benchmarks/GraphBLAS-sharp.Benchmarks/Program.fs @@ -4,7 +4,7 @@ open BenchmarkDotNet.Running [] let main argv = let benchmarks = - BenchmarkSwitcher [| typeof |] + BenchmarkSwitcher [| typeof |] benchmarks.Run argv |> ignore 0 From b8696b785ad8dd9fdbfc826cbae37bddc8d56d9c Mon Sep 17 00:00:00 2001 From: artemiipatov Date: Fri, 15 Dec 2023 21:11:49 +0300 Subject: [PATCH 7/7] refactor: formatting --- .../Operations/Kronecker.fs | 50 +++++++++++-------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/src/GraphBLAS-sharp.Backend/Operations/Kronecker.fs b/src/GraphBLAS-sharp.Backend/Operations/Kronecker.fs index de703962..8c5835ba 100644 --- a/src/GraphBLAS-sharp.Backend/Operations/Kronecker.fs +++ b/src/GraphBLAS-sharp.Backend/Operations/Kronecker.fs @@ -323,7 +323,8 @@ module internal Kronecker = let mutable startIndex = 0 - let value = clContext.CreateClCell Unchecked.defaultof<'a> + let value = + clContext.CreateClCell Unchecked.defaultof<'a> for row in 0 .. rowCount - 1 do let leftEdge, rightEdge = rowBoundaries.[row] @@ -346,31 +347,31 @@ module internal Kronecker = startIndex - let private countZeroElements - (clContext: ClContext) - workGroupSize - = + let private countZeroElements (clContext: ClContext) workGroupSize = let countZeroElementsInRow = <@ fun (ndRange: Range1D) (firstIndex: int) (lastIndex: int) (columnCount: int) (columns: ClArray) (result: ClArray) -> - let gid = ndRange.GlobalID0 + let gid = ndRange.GlobalID0 - let nnzInRow = lastIndex - firstIndex + 1 + let nnzInRow = lastIndex - firstIndex + 1 - if gid <= nnzInRow then + if gid <= nnzInRow then - if nnzInRow = 0 then - result.[0] <- columnCount + if nnzInRow = 0 then + result.[0] <- columnCount - elif gid = nnzInRow then + elif gid = nnzInRow then result.[nnzInRow] <- columnCount - columns.[lastIndex] - 1 - elif gid = 0 then - result.[0] <- columns.[firstIndex] + elif gid = 0 then + result.[0] <- columns.[firstIndex] - else - result.[gid] <- columns.[firstIndex + gid] - columns.[firstIndex + gid - 1] - 1 @> + else + result.[gid] <- + columns.[firstIndex + gid] + - columns.[firstIndex + gid - 1] + - 1 @> let kernel = clContext.Compile countZeroElementsInRow @@ -391,9 +392,15 @@ module internal Kronecker = let ndRange = Range1D.CreateValid(length, workGroupSize) - let result = clContext.CreateClArrayWithSpecificAllocationMode(DeviceOnly, length) + let result = + clContext.CreateClArrayWithSpecificAllocationMode(DeviceOnly, length) + + queue.Post( + Msg.MsgSetArguments + (fun () -> + kernel.KernelFunc ndRange firstIndex lastIndex matrix.ColumnCount matrix.Columns result) + ) - queue.Post(Msg.MsgSetArguments(fun () -> kernel.KernelFunc ndRange firstIndex lastIndex matrix.ColumnCount matrix.Columns result)) queue.Post(Msg.CreateRunMsg<_, _>(kernel)) zeroCounts.[row] <- result @@ -408,9 +415,11 @@ module internal Kronecker = let pairwise = ClArray.pairwise clContext workGroupSize - let mapInPlace = ClArray.mapInPlace <@ fun (a, b) -> (a, b - 1) @> clContext workGroupSize + let mapInPlace = + ClArray.mapInPlace <@ fun (a, b) -> (a, b - 1) @> clContext workGroupSize - let countZeroElements = countZeroElements clContext workGroupSize + let countZeroElements = + countZeroElements clContext workGroupSize let insertNonZero = insertNonZero clContext workGroupSize op @@ -446,7 +455,8 @@ module internal Kronecker = let rowBoundaries = pairsOfRowPointers.ToHostAndFree queue - let zeroCounts = countZeroElements queue matrixLeft rowBoundaries + let zeroCounts = + countZeroElements queue matrixLeft rowBoundaries let startIndex = insertNonZero queue rowBoundaries matrixRight matrixLeft.Values leftColumns resultMatrix