diff --git a/Samples/AlgorithmsSparseMatrix/AlgorithmsSparseMatrix.csproj b/Samples/AlgorithmsSparseMatrix/AlgorithmsSparseMatrix.csproj new file mode 100644 index 000000000..53656165d --- /dev/null +++ b/Samples/AlgorithmsSparseMatrix/AlgorithmsSparseMatrix.csproj @@ -0,0 +1,17 @@ + + + $(LibrarySamplesTargetFrameworks) + Exe + 8.0 + + + + true + AllEnabledByDefault + + + + + + + diff --git a/Samples/AlgorithmsSparseMatrix/Program.cs b/Samples/AlgorithmsSparseMatrix/Program.cs new file mode 100644 index 000000000..98490df96 --- /dev/null +++ b/Samples/AlgorithmsSparseMatrix/Program.cs @@ -0,0 +1,195 @@ +// --------------------------------------------------------------------------------------- +// ILGPU Samples +// Copyright (c) 2023 ILGPU Project +// www.ilgpu.net +// +// File: Program.cs +// +// This file is part of ILGPU and is distributed under the University of Illinois Open +// Source License. See LICENSE.txt for details. +// --------------------------------------------------------------------------------------- + +using ILGPU; +using ILGPU.Algorithms; +using ILGPU.Algorithms.MatrixOperations; +using ILGPU.Runtime; +using System; + +#pragma warning disable CA5394 // Insecure RNG + +namespace AlgorithmsSparseMatrix +{ +#if NET6_0_OR_GREATER + class Program + { + /// + /// Converts the given dense matrix into its sparse form. + /// + static SparseMatrixView Sparsify( + Random random, + Accelerator accelerator, + int length) + { + // Setup a sparse 2D matrix + var matrix = new float[length, length]; + + // Fill sparse matrix + for (int i = 0; i < matrix.GetLength(0); ++i) + { + for (int j = 0; j < matrix.GetLength(1); ++j) + { + // Create a sparse matrix with 5% sparse elements in this sample + matrix[i, j] = random.NextSingle() > 0.05f + ? 0.0f + : random.NextSingle() * 10.0f; + } + } + + // Allocate basic matrix on the accelerator and transfer it to the device + using var matrixBuffer = accelerator.Allocate2DDenseY(matrix.GetExtent()); + matrixBuffer.View.CopyFromCPU(matrix); + + // Allocate a temp buffer (or use existing memory from somewhere else) + using var tempBuffer = accelerator.Allocate1D(1); + + // Initialize the basic shape converter and data converters + var shapeConverter = accelerator.CreateSparseMatrixShapeConverter< + float, + FloatEpsPredicate, + Stride2D.General>(tempBuffer.View); + var converter = accelerator.CreateSparseMatrixConverter(); + + // Get basic shape of the sparse matrix living on the device which contains all required + // dimension information and the actual sparse lookup table for efficient processing + // the matrix elements later on + var numNeighborsBuffer = accelerator.Allocate1D(matrix.GetLength(0)); + var shapeView = shapeConverter( + accelerator.DefaultStream, + matrixBuffer.View.AsGeneral(), + new FloatEpsPredicate( + matrixBuffer.View.AsGeneral(), + 0.0f), + numNeighborsBuffer.View, + maxNumNeighbors => + { + // The maximum number of neighbors per row is available at this point and we just + // allocate a buffer here for demonstration purposes. In practice, this can be + // the creation of a subview from an existing buffer. + return accelerator.Allocate2DDenseY( + (matrix.GetLength(0), maxNumNeighbors)) + .View.AsGeneral(); + }); + + // Allocate the actual sparse data buffer for our result + var sparseMatrixBuffer = accelerator.Allocate2DDenseY( + (matrix.GetLength(0), shapeView.Neighbors.Extent.Y)); + + // Convert data and fill our sparse matrix structure + var sparseView = converter(accelerator.DefaultStream, matrixBuffer.View.AsGeneral(), + shapeView, sparseMatrixBuffer.View.AsGeneral()); + + // !!! Note that we *do not* dispose buffers here to keep them alive in the related + // views for the sake of simplicity. Please always make sure to dispose buffers + // properly in production code !!! + + // Sparse view now contains all required data elements + return sparseView; + } + + /// + /// Multiplies the given dense matrix and the sparse matrix efficiently on the GPU, + /// while transposing the sparse matrix on the fly. + /// + static void MultiplySparseTransposed( + Accelerator accelerator, + float[,] denseMatrix, + SparseMatrixView sparseView) + { + // As mentioned above, the integrated sparse-matrix processor allows multiplying + // the dense matrix with the sparse one while transposing the latter one. + // However, it also allows to specify which values we are interested in and + // ignoring all other values. For this purpose, we can use specialized predicates, + // of which several are already predefined and available. In this sample, we use + // a predicate that operates on a dense matrix to test whether the values are above + // a certain threshold. If yes, the corresponding matrix element in the result + // matrix will be computed. + + var maskMatrix = new float[denseMatrix.GetLength(0), denseMatrix.GetLength(1)]; + for (int i = 0; i < maskMatrix.GetLength(0); ++i) + { + for (int j = 0; j < maskMatrix.GetLength(1); ++j) + maskMatrix[i, j] = 1.0f; // Use your own values to avoid computing result elements + } + + // Allocate dense output matrix + using var aMatrixBuffer = accelerator.Allocate2DDenseY(denseMatrix); + using var pMatrixBuffer = accelerator.Allocate2DDenseY(maskMatrix); + using var outBuffer = accelerator.Allocate2DDenseY(denseMatrix.GetExtent()); + + // Create a single-streamed sparse matrix processor to multiply our matrix instances + // as efficiently as possible + var processor = accelerator.CreateSparseTransposedMatrixMultiplierMasked< + float, + FloatEpsPredicate, + Stride2D.General, + FloatMaskedSparseMatrixProcessor>(); + + // Multiply a single masked sparse matrix + processor( + accelerator.DefaultStream, + new FloatEpsPredicate(pMatrixBuffer.View.AsGeneral(), 0.0f), + aMatrixBuffer.View.AsGeneral(), + sparseView, + outBuffer.View.AsGeneral()); + + // The outBuffer contains the multiplication result + } + + static void Main() + { + // Get a new ILGPU context + using var context = + Context.Create(builder => builder.Default().EnableAlgorithms()); + + // Create a new RNG on the CPU side + var random = new Random(); + + const int lengthA = 288; + const int lengthB = 376; + + // For each available device... + foreach (var device in context) + { + // Create the associated accelerator + using var accelerator = device.CreateAccelerator(context); + Console.WriteLine($"Performing operations on {accelerator}"); + + // Create a new dense matrix on the CPU and let it be sparsified on the GPU + var sparseMatrixView = Sparsify(random, accelerator, lengthB); + + // Now, use the sparse matrix and multiply it efficiently with the given dense one + var denseMatrix = new float[lengthA, lengthB]; + for (int i = 0; i < denseMatrix.GetLength(0); ++i) + { + for (int j = 0; j < denseMatrix.GetLength(1); ++j) + denseMatrix[i, j] = random.NextSingle(); + } + + // Note that this sample method demonstrates the use of a specialized operation: + // A * B^T, where B is considered a huger sparse matrix + MultiplySparseTransposed(accelerator, denseMatrix, sparseMatrixView); + } + } + } +#else + class Program + { + static void Main() + { + Console.WriteLine("Cannot use this sample on frameworks prior to .Net6.0"); + } + } +#endif +} + +#pragma warning restore CA5394 diff --git a/Samples/ILGPU.Samples.sln b/Samples/ILGPU.Samples.sln index 1651504ce..827759bba 100644 --- a/Samples/ILGPU.Samples.sln +++ b/Samples/ILGPU.Samples.sln @@ -127,6 +127,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MemoryBufferStrides", "Memo EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "StaticAbstractInterfaceMembers", "StaticAbstractInterfaceMembers\StaticAbstractInterfaceMembers.csproj", "{28FD07DE-7B7D-46C3-9EE1-5D50C0E4F126}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AlgorithmsSparseMatrix", "AlgorithmsSparseMatrix\AlgorithmsSparseMatrix.csproj", "{FBC7F8FA-8EB1-44EA-969E-B3DD365627ED}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -357,6 +359,10 @@ Global {28FD07DE-7B7D-46C3-9EE1-5D50C0E4F126}.Debug|Any CPU.Build.0 = Debug|Any CPU {28FD07DE-7B7D-46C3-9EE1-5D50C0E4F126}.Release|Any CPU.ActiveCfg = Release|Any CPU {28FD07DE-7B7D-46C3-9EE1-5D50C0E4F126}.Release|Any CPU.Build.0 = Release|Any CPU + {FBC7F8FA-8EB1-44EA-969E-B3DD365627ED}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {FBC7F8FA-8EB1-44EA-969E-B3DD365627ED}.Debug|Any CPU.Build.0 = Debug|Any CPU + {FBC7F8FA-8EB1-44EA-969E-B3DD365627ED}.Release|Any CPU.ActiveCfg = Release|Any CPU + {FBC7F8FA-8EB1-44EA-969E-B3DD365627ED}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -421,6 +427,7 @@ Global {1A909DA2-15AE-466F-8BBE-C3F676C39812} = {30F390DB-B823-40A2-A881-382B9EF36C07} {2EF99A5B-9AAE-44A8-BB41-923DF66A7EAB} = {C1D99632-ED4A-4B08-A14D-4C8DB375934F} {28FD07DE-7B7D-46C3-9EE1-5D50C0E4F126} = {C1D99632-ED4A-4B08-A14D-4C8DB375934F} + {FBC7F8FA-8EB1-44EA-969E-B3DD365627ED} = {25BA2234-5778-40BC-9386-9CE87AB87D1F} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {30E502BD-3826-417F-888F-1CE19CF5C6DA}