diff --git a/Src/ILGPU.Algorithms/IL/ILFunctions.cs b/Src/ILGPU.Algorithms/IL/ILFunctions.cs
deleted file mode 100644
index aa19cda85..000000000
--- a/Src/ILGPU.Algorithms/IL/ILFunctions.cs
+++ /dev/null
@@ -1,204 +0,0 @@
-// ---------------------------------------------------------------------------------------
-// ILGPU Algorithms
-// Copyright (c) 2021 ILGPU Project
-// www.ilgpu.net
-//
-// File: ILFunctions.cs
-//
-// This file is part of ILGPU and is distributed under the University of Illinois Open
-// Source License. See LICENSE.txt for details.
-// ---------------------------------------------------------------------------------------
-
-using ILGPU.Algorithms.ScanReduceOperations;
-using System;
-using System.Diagnostics;
-using System.Runtime.CompilerServices;
-
-namespace ILGPU.Algorithms.IL
-{
- ///
- /// Custom IL-specific implementations.
- ///
- static class ILFunctions
- {
- #region Nested Types
-
- public interface IILFunctionImplementation
- {
- ///
- /// The maximum number of supported thread per context on the CPU accelerator
- /// for the implementation of specific algorithms.
- ///
- int MaxNumThreads { get; }
-
- ///
- /// Returns true if the current thread is the first thread.
- ///
- bool IsFirstThread { get; }
-
- ///
- /// Returns the current linear thread index.
- ///
- int ThreadIndex { get; }
-
- ///
- /// Returns the linear thread dimension.
- ///
- int ThreadDimension { get; }
-
- ///
- /// The number of segments for reduce operations.
- ///
- int ReduceSegments { get; }
-
- ///
- /// The reduction segment index to write to and read from.
- ///
- int ReduceSegmentIndex { get; }
-
- ///
- /// Executes a barrier in the current context.
- ///
- void Barrier();
- }
-
- #endregion
-
- #region Reduce
-
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static T Reduce(T value)
- where T : unmanaged
- where TReduction : IScanReduceOperation
- where TImpl : struct, IILFunctionImplementation =>
- AllReduce(value);
-
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static T AllReduce(T value)
- where T : unmanaged
- where TReduction : IScanReduceOperation
- where TImpl : struct, IILFunctionImplementation
- {
- TImpl impl = default;
- var sharedMemory = SharedMemory.Allocate(impl.ReduceSegments);
-
- TReduction reduction = default;
- if (impl.IsFirstThread)
- sharedMemory[impl.ReduceSegmentIndex] = reduction.Identity;
- impl.Barrier();
-
- reduction.AtomicApply(ref sharedMemory[impl.ReduceSegmentIndex], value);
-
- impl.Barrier();
- return sharedMemory[impl.ReduceSegmentIndex];
- }
-
- #endregion
-
- #region Scan
-
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static T ExclusiveScan(T value)
- where T : unmanaged
- where TScanOperation : struct, IScanReduceOperation
- where TImpl : struct, IILFunctionImplementation =>
- ExclusiveScanWithBoundaries(value, out var _);
-
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static T InclusiveScan(T value)
- where T : unmanaged
- where TScanOperation : struct, IScanReduceOperation
- where TImpl : struct, IILFunctionImplementation =>
- InclusiveScanWithBoundaries(value, out var _);
-
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static T ExclusiveScanWithBoundaries(
- T value,
- out ScanBoundaries boundaries)
- where T : unmanaged
- where TScanOperation : struct, IScanReduceOperation
- where TImpl : struct, IILFunctionImplementation
- {
- TImpl impl = default;
- var sharedMemory = InclusiveScanImplementation(
- value);
- boundaries = new ScanBoundaries(
- sharedMemory[0],
- sharedMemory[Math.Max(0, impl.ThreadDimension - 2)]);
- return impl.IsFirstThread
- ? default(TScanOperation).Identity
- : sharedMemory[impl.ThreadIndex - 1];
- }
-
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static T InclusiveScanWithBoundaries(
- T value,
- out ScanBoundaries boundaries)
- where T : unmanaged
- where TScanOperation : struct, IScanReduceOperation
- where TImpl : struct, IILFunctionImplementation
- {
- TImpl impl = default;
- var sharedMemory = InclusiveScanImplementation(
- value);
- boundaries = new ScanBoundaries(
- sharedMemory[0],
- sharedMemory[impl.ThreadDimension - 1]);
- return sharedMemory[impl.ThreadIndex];
- }
-
- ///
- /// Performs a group-wide inclusive scan.
- ///
- /// The element type.
- /// The type of the warp scan logic.
- /// The internal implementation type.
- /// The value to scan.
- /// The resulting value for the current lane.
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static ArrayView InclusiveScanImplementation<
- T,
- TScanOperation,
- TImpl>(
- T value)
- where T : unmanaged
- where TScanOperation : struct, IScanReduceOperation
- where TImpl : struct, IILFunctionImplementation
- {
- TImpl impl = default;
-
- // Load values into shared memory
- var sharedMemory = SharedMemory.Allocate(impl.MaxNumThreads);
- Debug.Assert(
- impl.ThreadDimension <= impl.MaxNumThreads,
- "Invalid group/warp size");
- sharedMemory[impl.ThreadIndex] = value;
- impl.Barrier();
-
- // First thread performs all operations
- if (impl.IsFirstThread)
- {
- TScanOperation scanOperation = default;
- for (int i = 1; i < impl.ThreadDimension; ++i)
- {
- sharedMemory[i] = scanOperation.Apply(
- sharedMemory[i - 1],
- sharedMemory[i]);
- }
- }
- impl.Barrier();
-
- return sharedMemory;
- }
-
- #endregion
- }
-}
diff --git a/Src/ILGPU.Algorithms/IL/ILGroupExtensions.cs b/Src/ILGPU.Algorithms/IL/ILGroupExtensions.cs
index af0d43fad..f4f1db1db 100644
--- a/Src/ILGPU.Algorithms/IL/ILGroupExtensions.cs
+++ b/Src/ILGPU.Algorithms/IL/ILGroupExtensions.cs
@@ -1,6 +1,6 @@
// ---------------------------------------------------------------------------------------
// ILGPU Algorithms
-// Copyright (c) 2019-2021 ILGPU Project
+// Copyright (c) 2019-2022 ILGPU Project
// www.ilgpu.net
//
// File: ILGroupExtensions.cs
@@ -10,8 +10,9 @@
// ---------------------------------------------------------------------------------------
using ILGPU.Algorithms.ScanReduceOperations;
+using System;
+using System.Diagnostics;
using System.Runtime.CompilerServices;
-using static ILGPU.Algorithms.IL.ILFunctions;
namespace ILGPU.Algorithms.IL
{
@@ -20,54 +21,6 @@ namespace ILGPU.Algorithms.IL
///
static class ILGroupExtensions
{
- #region Nested Types
-
- ///
- /// Implements ILFunctions for groups.
- ///
- private readonly struct GroupImplementation : IILFunctionImplementation
- {
- ///
- /// Returns 1024.
- ///
- ///
- /// TODO: refine the implementation to avoid a hard-coded constant.
- ///
- public readonly int MaxNumThreads => 1024;
-
- ///
- /// Returns true if this is the first group thread.
- ///
- public readonly bool IsFirstThread => Group.IsFirstThread;
-
- ///
- /// Returns current linear group index.
- ///
- public readonly int ThreadIndex => Group.LinearIndex;
-
- ///
- /// Returns the linear group dimension.
- ///
- public readonly int ThreadDimension => Group.Dimension.Size;
-
- ///
- /// Returns 1.
- ///
- public readonly int ReduceSegments => 1;
-
- ///
- /// Returns 0.
- ///
- public readonly int ReduceSegmentIndex => 0;
-
- ///
- /// Performs a group-wide barrier.
- ///
- public readonly void Barrier() => Group.Barrier();
- }
-
- #endregion
-
#region Reduce
///
@@ -81,8 +34,23 @@ public static T Reduce(T value)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static T AllReduce(T value)
where T : unmanaged
- where TReduction : IScanReduceOperation =>
- AllReduce(value);
+ where TReduction : IScanReduceOperation
+ {
+ TReduction reduction = default;
+
+ ref var sharedMemory = ref SharedMemory.Allocate();
+ if (Group.IsFirstThread)
+ sharedMemory = reduction.Identity;
+ Group.Barrier();
+
+ // Reduce inside all warps first
+ var firstLaneReduced = ILWarpExtensions.Reduce(value);
+ if (Warp.IsFirstLane)
+ reduction.AtomicApply(ref sharedMemory, firstLaneReduced);
+
+ Group.Barrier();
+ return sharedMemory;
+ }
#endregion
@@ -93,14 +61,14 @@ public static T AllReduce(T value)
public static T ExclusiveScan(T value)
where T : unmanaged
where TScanOperation : struct, IScanReduceOperation =>
- ExclusiveScan(value);
+ ExclusiveScanWithBoundaries(value, out var _);
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static T InclusiveScan(T value)
where T : unmanaged
where TScanOperation : struct, IScanReduceOperation =>
- InclusiveScan(value);
+ InclusiveScanWithBoundaries(value, out var _);
///
@@ -109,10 +77,16 @@ public static T ExclusiveScanWithBoundaries(
T value,
out ScanBoundaries boundaries)
where T : unmanaged
- where TScanOperation : struct, IScanReduceOperation =>
- ExclusiveScanWithBoundaries(
- value,
- out boundaries);
+ where TScanOperation : struct, IScanReduceOperation
+ {
+ var sharedMemory = InclusiveScanImplementation(value);
+ boundaries = new ScanBoundaries(
+ sharedMemory[0],
+ sharedMemory[Math.Max(0, Group.Dimension.Size - 2)]);
+ return Group.IsFirstThread
+ ? default(TScanOperation).Identity
+ : sharedMemory[Group.LinearIndex - 1];
+ }
///
@@ -121,10 +95,54 @@ public static T InclusiveScanWithBoundaries(
T value,
out ScanBoundaries boundaries)
where T : unmanaged
- where TScanOperation : struct, IScanReduceOperation =>
- InclusiveScanWithBoundaries(
- value,
- out boundaries);
+ where TScanOperation : struct, IScanReduceOperation
+ {
+ var sharedMemory = InclusiveScanImplementation(
+ value);
+ boundaries = new ScanBoundaries(
+ sharedMemory[0],
+ sharedMemory[Group.Dimension.Size - 1]);
+ return sharedMemory[Group.LinearIndex];
+ }
+
+ ///
+ /// Performs a group-wide inclusive scan.
+ ///
+ /// The element type.
+ /// The type of the warp scan logic.
+ /// The value to scan.
+ /// The resulting value for the current lane.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static ArrayView InclusiveScanImplementation(
+ T value)
+ where T : unmanaged
+ where TScanOperation : struct, IScanReduceOperation
+ {
+ const int MaxNumThreads = 2048;
+
+ // Load values into shared memory
+ var sharedMemory = SharedMemory.Allocate(MaxNumThreads);
+ Debug.Assert(
+ Group.Dimension.Size <= MaxNumThreads,
+ "Invalid group/warp size");
+ sharedMemory[Group.LinearIndex] = value;
+ Group.Barrier();
+
+ // First thread performs all operations
+ if (Group.IsFirstThread)
+ {
+ TScanOperation scanOperation = default;
+ for (int i = 1; i < Group.Dimension.Size; ++i)
+ {
+ sharedMemory[i] = scanOperation.Apply(
+ sharedMemory[i - 1],
+ sharedMemory[i]);
+ }
+ }
+ Group.Barrier();
+
+ return sharedMemory;
+ }
///
/// Prepares for the next iteration of a group-wide exclusive scan within the
diff --git a/Src/ILGPU.Algorithms/IL/ILWarpExtensions.cs b/Src/ILGPU.Algorithms/IL/ILWarpExtensions.cs
index 7b1b56069..f4f987400 100644
--- a/Src/ILGPU.Algorithms/IL/ILWarpExtensions.cs
+++ b/Src/ILGPU.Algorithms/IL/ILWarpExtensions.cs
@@ -1,6 +1,6 @@
// ---------------------------------------------------------------------------------------
// ILGPU Algorithms
-// Copyright (c) 2019-2021 ILGPU Project
+// Copyright (c) 2019-2022 ILGPU Project
// www.ilgpu.net
//
// File: ILWarpExtensions.cs
@@ -9,65 +9,18 @@
// Source License. See LICENSE.txt for details.
// ---------------------------------------------------------------------------------------
+using ILGPU.Algorithms.PTX;
using ILGPU.Algorithms.ScanReduceOperations;
using System.Runtime.CompilerServices;
-using static ILGPU.Algorithms.IL.ILFunctions;
namespace ILGPU.Algorithms.IL
{
///
- /// Custom IL-specific implementations.
+ /// Custom IL-specific implementations that fall back to PTX-specific implementations
+ /// as the CPU runtime is fully compatible with the PTX runtime.
///
static class ILWarpExtensions
{
- #region Nested Types
-
- ///
- /// Implements ILFunctions for warps.
- ///
- private readonly struct WarpImplementation : IILFunctionImplementation
- {
- ///
- /// Returns 256.
- ///
- ///
- /// TODO: refine the implementation to avoid a hard-coded constant.
- ///
- public readonly int MaxNumThreads => 256;
-
- ///
- /// Returns true if this is the first warp thread.
- ///
- public readonly bool IsFirstThread => Warp.IsFirstLane;
-
- ///
- /// Returns current lane index.
- ///
- public readonly int ThreadIndex => Warp.LaneIdx;
-
- ///
- /// Returns the warp size.
- ///
- public readonly int ThreadDimension => Warp.WarpSize;
-
- ///
- /// Returns the number of warps per group.
- ///
- public readonly int ReduceSegments => MaxNumThreads / Warp.WarpSize;
-
- ///
- /// Returns the current warp index.
- ///
- public readonly int ReduceSegmentIndex => Warp.WarpIdx;
-
- ///
- /// Performs a warp-wide barrier.
- ///
- public readonly void Barrier() => Warp.Barrier();
- }
-
- #endregion
-
#region Reduce
///
@@ -75,14 +28,14 @@ static class ILWarpExtensions
public static T Reduce(T value)
where T : unmanaged
where TReduction : IScanReduceOperation =>
- AllReduce(value);
+ PTXWarpExtensions.Reduce(value);
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static T AllReduce(T value)
where T : unmanaged
where TReduction : IScanReduceOperation =>
- AllReduce(value);
+ PTXWarpExtensions.AllReduce(value);
#endregion
@@ -93,14 +46,14 @@ public static T AllReduce(T value)
public static T ExclusiveScan(T value)
where T : unmanaged
where TScanOperation : struct, IScanReduceOperation =>
- ExclusiveScan(value);
+ PTXWarpExtensions.ExclusiveScan(value);
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static T InclusiveScan(T value)
where T : unmanaged
where TScanOperation : struct, IScanReduceOperation =>
- InclusiveScan(value);
+ PTXWarpExtensions.InclusiveScan(value);
#endregion
}