diff --git a/Samples/AlgorithmsSparseMatrix/AlgorithmsSparseMatrix.csproj b/Samples/AlgorithmsSparseMatrix/AlgorithmsSparseMatrix.csproj
new file mode 100644
index 000000000..53656165d
--- /dev/null
+++ b/Samples/AlgorithmsSparseMatrix/AlgorithmsSparseMatrix.csproj
@@ -0,0 +1,17 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <TargetFrameworks>$(LibrarySamplesTargetFrameworks)</TargetFrameworks>
+    <OutputType>Exe</OutputType>
+    <LangVersion>8.0</LangVersion>
+  </PropertyGroup>
+
+  <PropertyGroup>
+    <EnableNETAnalyzers>true</EnableNETAnalyzers>
+    <AnalysisMode>AllEnabledByDefault</AnalysisMode>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\Src\ILGPU\ILGPU.csproj" />
+    <ProjectReference Include="..\..\Src\ILGPU.Algorithms\ILGPU.Algorithms.csproj" />
+  </ItemGroup>
+</Project>
diff --git a/Samples/AlgorithmsSparseMatrix/Program.cs b/Samples/AlgorithmsSparseMatrix/Program.cs
new file mode 100644
index 000000000..98490df96
--- /dev/null
+++ b/Samples/AlgorithmsSparseMatrix/Program.cs
@@ -0,0 +1,195 @@
+﻿// ---------------------------------------------------------------------------------------
+//                                    ILGPU Samples
+//                           Copyright (c) 2023 ILGPU Project
+//                                    www.ilgpu.net
+//
+// File: Program.cs
+//
+// This file is part of ILGPU and is distributed under the University of Illinois Open
+// Source License. See LICENSE.txt for details.
+// ---------------------------------------------------------------------------------------
+
+using ILGPU;
+using ILGPU.Algorithms;
+using ILGPU.Algorithms.MatrixOperations;
+using ILGPU.Runtime;
+using System;
+
+#pragma warning disable CA5394 // Insecure RNG
+
+namespace AlgorithmsSparseMatrix
+{
+#if NET6_0_OR_GREATER
+    class Program
+    {
+        /// <summary>
+        /// Converts the given dense matrix into its sparse form.
+        /// </summary>
+        static SparseMatrixView<float, Stride2D.General> Sparsify(
+            Random random,
+            Accelerator accelerator,
+            int length)
+        {
+            // Setup a sparse 2D matrix
+            var matrix = new float[length, length];
+
+            // Fill sparse matrix
+            for (int i = 0; i < matrix.GetLength(0); ++i)
+            {
+                for (int j = 0; j < matrix.GetLength(1); ++j)
+                {
+                    // Create a sparse matrix with 5% sparse elements in this sample
+                    matrix[i, j] = random.NextSingle() > 0.05f
+                        ? 0.0f
+                        : random.NextSingle() * 10.0f;
+                }
+            }
+
+            // Allocate basic matrix on the accelerator and transfer it to the device
+            using var matrixBuffer = accelerator.Allocate2DDenseY<float>(matrix.GetExtent());
+            matrixBuffer.View.CopyFromCPU(matrix);
+
+            // Allocate a temp buffer (or use existing memory from somewhere else)
+            using var tempBuffer = accelerator.Allocate1D<int>(1);
+
+            // Initialize the basic shape converter and data converters
+            var shapeConverter = accelerator.CreateSparseMatrixShapeConverter<
+                float,
+                FloatEpsPredicate<Stride2D.General>,
+                Stride2D.General>(tempBuffer.View);
+            var converter = accelerator.CreateSparseMatrixConverter<float, Stride2D.General>();
+
+            // Get basic shape of the sparse matrix living on the device which contains all required
+            // dimension information and the actual sparse lookup table for efficient processing
+            // the matrix elements later on
+            var numNeighborsBuffer = accelerator.Allocate1D<int>(matrix.GetLength(0));
+            var shapeView = shapeConverter(
+                accelerator.DefaultStream,
+                matrixBuffer.View.AsGeneral(),
+                new FloatEpsPredicate<Stride2D.General>(
+                    matrixBuffer.View.AsGeneral(),
+                    0.0f),
+                numNeighborsBuffer.View,
+                maxNumNeighbors =>
+                {
+                    // The maximum number of neighbors per row is available at this point and we just
+                    // allocate a buffer here for demonstration purposes. In practice, this can be
+                    // the creation of a subview from an existing buffer.
+                    return accelerator.Allocate2DDenseY<int>(
+                            (matrix.GetLength(0), maxNumNeighbors))
+                        .View.AsGeneral();
+                });
+
+            // Allocate the actual sparse data buffer for our result
+            var sparseMatrixBuffer = accelerator.Allocate2DDenseY<float>(
+                (matrix.GetLength(0), shapeView.Neighbors.Extent.Y));
+
+            // Convert data and fill our sparse matrix structure
+            var sparseView = converter(accelerator.DefaultStream, matrixBuffer.View.AsGeneral(),
+                shapeView, sparseMatrixBuffer.View.AsGeneral());
+
+            // !!! Note that we *do not* dispose buffers here to keep them alive in the related
+            // views for the sake of simplicity. Please always make sure to dispose buffers
+            // properly in production code !!!
+
+            // Sparse view now contains all required data elements
+            return sparseView;
+        }
+
+        /// <summary>
+        /// Multiplies the given dense matrix and the sparse matrix efficiently on the GPU,
+        /// while transposing the sparse matrix on the fly.
+        /// </summary>
+        static void MultiplySparseTransposed(
+            Accelerator accelerator,
+            float[,] denseMatrix,
+            SparseMatrixView<float, Stride2D.General> sparseView)
+        {
+            // As mentioned above, the integrated sparse-matrix processor allows multiplying
+            // the dense matrix with the sparse one while transposing the latter one.
+            // However, it also allows to specify which values we are interested in and
+            // ignoring all other values. For this purpose, we can use specialized predicates,
+            // of which several are already predefined and available. In this sample, we use
+            // a predicate that operates on a dense matrix to test whether the values are above
+            // a certain threshold. If yes, the corresponding matrix element in the result
+            // matrix will be computed.
+
+            var maskMatrix = new float[denseMatrix.GetLength(0), denseMatrix.GetLength(1)];
+            for (int i = 0; i < maskMatrix.GetLength(0); ++i)
+            {
+                for (int j = 0; j < maskMatrix.GetLength(1); ++j)
+                    maskMatrix[i, j] = 1.0f; // Use your own values to avoid computing result elements
+            }
+
+            // Allocate dense output matrix
+            using var aMatrixBuffer = accelerator.Allocate2DDenseY(denseMatrix);
+            using var pMatrixBuffer = accelerator.Allocate2DDenseY(maskMatrix);
+            using var outBuffer = accelerator.Allocate2DDenseY<float>(denseMatrix.GetExtent());
+
+            // Create a single-streamed sparse matrix processor to multiply our matrix instances
+            // as efficiently as possible
+            var processor = accelerator.CreateSparseTransposedMatrixMultiplierMasked<
+                float,
+                FloatEpsPredicate<Stride2D.General>,
+                Stride2D.General,
+                FloatMaskedSparseMatrixProcessor>();
+
+            // Multiply a single masked sparse matrix
+            processor(
+                accelerator.DefaultStream,
+                new FloatEpsPredicate<Stride2D.General>(pMatrixBuffer.View.AsGeneral(), 0.0f),
+                aMatrixBuffer.View.AsGeneral(),
+                sparseView,
+                outBuffer.View.AsGeneral());
+
+            // The outBuffer contains the multiplication result
+        }
+
+        static void Main()
+        {
+            // Get a new ILGPU context
+            using var context =
+                Context.Create(builder => builder.Default().EnableAlgorithms());
+
+            // Create a new RNG on the CPU side
+            var random = new Random();
+
+            const int lengthA = 288;
+            const int lengthB = 376;
+
+            // For each available device...
+            foreach (var device in context)
+            {
+                // Create the associated accelerator
+                using var accelerator = device.CreateAccelerator(context);
+                Console.WriteLine($"Performing operations on {accelerator}");
+
+                // Create a new dense matrix on the CPU and let it be sparsified on the GPU
+                var sparseMatrixView = Sparsify(random, accelerator, lengthB);
+
+                // Now, use the sparse matrix and multiply it efficiently with the given dense one
+                var denseMatrix = new float[lengthA, lengthB];
+                for (int i = 0; i < denseMatrix.GetLength(0); ++i)
+                {
+                    for (int j = 0; j < denseMatrix.GetLength(1); ++j)
+                        denseMatrix[i, j] = random.NextSingle();
+                }
+
+                // Note that this sample method demonstrates the use of a specialized operation:
+                // A * B^T, where B is considered a huger sparse matrix
+                MultiplySparseTransposed(accelerator, denseMatrix, sparseMatrixView);
+            }
+        }
+    }
+#else
+    class Program
+    {
+        static void Main()
+        {
+            Console.WriteLine("Cannot use this sample on frameworks prior to .Net6.0");
+        }
+    }
+#endif
+}
+
+#pragma warning restore CA5394
diff --git a/Samples/ILGPU.Samples.sln b/Samples/ILGPU.Samples.sln
index 1651504ce..827759bba 100644
--- a/Samples/ILGPU.Samples.sln
+++ b/Samples/ILGPU.Samples.sln
@@ -127,6 +127,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MemoryBufferStrides", "Memo
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "StaticAbstractInterfaceMembers", "StaticAbstractInterfaceMembers\StaticAbstractInterfaceMembers.csproj", "{28FD07DE-7B7D-46C3-9EE1-5D50C0E4F126}"
 EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AlgorithmsSparseMatrix", "AlgorithmsSparseMatrix\AlgorithmsSparseMatrix.csproj", "{FBC7F8FA-8EB1-44EA-969E-B3DD365627ED}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -357,6 +359,10 @@ Global
 		{28FD07DE-7B7D-46C3-9EE1-5D50C0E4F126}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{28FD07DE-7B7D-46C3-9EE1-5D50C0E4F126}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{28FD07DE-7B7D-46C3-9EE1-5D50C0E4F126}.Release|Any CPU.Build.0 = Release|Any CPU
+		{FBC7F8FA-8EB1-44EA-969E-B3DD365627ED}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{FBC7F8FA-8EB1-44EA-969E-B3DD365627ED}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{FBC7F8FA-8EB1-44EA-969E-B3DD365627ED}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{FBC7F8FA-8EB1-44EA-969E-B3DD365627ED}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
@@ -421,6 +427,7 @@ Global
 		{1A909DA2-15AE-466F-8BBE-C3F676C39812} = {30F390DB-B823-40A2-A881-382B9EF36C07}
 		{2EF99A5B-9AAE-44A8-BB41-923DF66A7EAB} = {C1D99632-ED4A-4B08-A14D-4C8DB375934F}
 		{28FD07DE-7B7D-46C3-9EE1-5D50C0E4F126} = {C1D99632-ED4A-4B08-A14D-4C8DB375934F}
+		{FBC7F8FA-8EB1-44EA-969E-B3DD365627ED} = {25BA2234-5778-40BC-9386-9CE87AB87D1F}
 	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {30E502BD-3826-417F-888F-1CE19CF5C6DA}