diff --git a/Src/ILGPU/Backends/PTX/PTXBackend.cs b/Src/ILGPU/Backends/PTX/PTXBackend.cs index 746572f9a..c1a8da8fe 100644 --- a/Src/ILGPU/Backends/PTX/PTXBackend.cs +++ b/Src/ILGPU/Backends/PTX/PTXBackend.cs @@ -168,7 +168,7 @@ protected override StringBuilder CreateKernelBuilder( ? PointerAlignments.Create( backendContext.KernelMethod, DefaultGlobalMemoryAlignment) - : PointerAlignments.Empty; + : null; data = new PTXCodeGenerator.GeneratorArgs( this, diff --git a/Src/ILGPU/Backends/PTX/PTXCodeGenerator.Emitter.cs b/Src/ILGPU/Backends/PTX/PTXCodeGenerator.Emitter.cs index 2a5309328..667742171 100644 --- a/Src/ILGPU/Backends/PTX/PTXCodeGenerator.Emitter.cs +++ b/Src/ILGPU/Backends/PTX/PTXCodeGenerator.Emitter.cs @@ -659,21 +659,27 @@ public void EmitComplexCommandWithOffsets( /// to resolve the correct offset in bytes within a structure. /// /// The emitter type. + /// The pointer to get the alignment for. + /// The safe minimum alignment in bytes. /// The generic command to emit. /// The current emitter. /// The involved register. - /// The base alignment in bytes. [MethodImpl(MethodImplOptions.AggressiveInlining)] public void EmitVectorizedCommand( + Value pointerValue, + int safeAlignment, string command, in TEmitter emitter, - Register register, - int alignment) + Register register) where TEmitter : IVectorizedCommandEmitter { - if (register is CompoundRegister compoundRegister) + if (PointerAlignments != null && + register is CompoundRegister compoundRegister) { // Check the provided alignment value to create vectorized instructions + int alignment = PointerAlignments.GetAlignment( + pointerValue, + safeAlignment); var ranges = compoundRegister.Type.VectorizableFields; for (int i = 0, e = ranges.Count; i < e; ++i) { diff --git a/Src/ILGPU/Backends/PTX/PTXCodeGenerator.Values.cs b/Src/ILGPU/Backends/PTX/PTXCodeGenerator.Values.cs index 634425caf..328835c3b 100644 --- a/Src/ILGPU/Backends/PTX/PTXCodeGenerator.Values.cs +++ b/Src/ILGPU/Backends/PTX/PTXCodeGenerator.Values.cs @@ -439,15 +439,12 @@ public void GenerateCode(Load load) var sourceType = load.Source.Type as PointerType; var targetRegister = Allocate(load); - // Query alignment information to emit vectorized instructions - int alignment = PointerAlignments.GetAlignment( - load.Source, - sourceType.ElementType.Alignment); EmitVectorizedCommand( + load.Source, + sourceType.ElementType.Alignment, PTXInstructions.LoadOperation, new LoadEmitter(sourceType, address), - targetRegister, - alignment); + targetRegister); } /// @@ -540,15 +537,12 @@ public void GenerateCode(Store store) var targetType = store.Target.Type as PointerType; var value = Load(store.Value); - // Query alignment information to emit vectorized instructions - int baseAlignment = PointerAlignments.GetAlignment( - store.Target, - targetType.ElementType.Alignment); EmitVectorizedCommand( + store.Target, + targetType.ElementType.Alignment, PTXInstructions.StoreOperation, new StoreEmitter(targetType, address), - value, - baseAlignment); + value); } /// diff --git a/Src/ILGPU/IR/Analyses/PointerAlignments.cs b/Src/ILGPU/IR/Analyses/PointerAlignments.cs index f8febccb1..645e8d583 100644 --- a/Src/ILGPU/IR/Analyses/PointerAlignments.cs +++ b/Src/ILGPU/IR/Analyses/PointerAlignments.cs @@ -84,14 +84,15 @@ public readonly int Merge(int first, int second) => { LoadFieldAddress lfa => AnalysisValue.Create( - context[lfa.Source].Data + - lfa.StructureType.GetOffset(lfa.FieldSpan.Access), - lfa.Type), + Math.Min( + context[lfa.Source].Data, + lfa.StructureType[lfa.FieldSpan.Access].Alignment), + lfa.Type), LoadElementAddress lea => AnalysisValue.Create( - Math.Min( + Math.Max( context[lea.Source].Data, - (lea.Type as IAddressSpaceType).ElementType.Size), + (lea.Type as IAddressSpaceType).ElementType.Alignment), lea.Type), _ => null, }; diff --git a/Src/ILGPU/IR/Types/StructureType.cs b/Src/ILGPU/IR/Types/StructureType.cs index 6d1c6588a..6571086dc 100644 --- a/Src/ILGPU/IR/Types/StructureType.cs +++ b/Src/ILGPU/IR/Types/StructureType.cs @@ -18,7 +18,6 @@ using System.Diagnostics.CodeAnalysis; using System.Reflection; using System.Runtime.CompilerServices; -using System.Security.Permissions; using System.Text; namespace ILGPU.IR.Types @@ -410,10 +409,16 @@ public readonly struct VectorizableFieldCollection /// public struct Entry { + #region Instance + /// /// Constructs a new entry. /// - internal Entry(TypeNode type, int index, int offset, int count = 1) + internal Entry( + TypeNode type, + int index, + int offset, + int count = 1) { Type = type; Index = index; @@ -421,8 +426,6 @@ internal Entry(TypeNode type, int index, int offset, int count = 1) Offset = offset; } - #region Instance - #endregion #region Properties @@ -440,17 +443,39 @@ internal Entry(TypeNode type, int index, int offset, int count = 1) /// /// Returns the number of fields. /// - public int Count { get; private set; } + public int Count { readonly get; private set; } /// /// Returns the base offset in bytes from the beginning of the field. /// public int Offset { get; } + /// + /// Returns the required alignment in bytes. + /// + public readonly int RequiredAlignment => Count * Type.Size; + #endregion #region Methods + /// + /// Splits the current entry into two parts. + /// + /// The first part. + /// The second part. + internal void Split(out Entry first, out Entry second) + { + Type.Assert(Count > 1); + int firstCount = Count >> 1 + Count % 2; + first = new Entry(Type, Index, Offset, firstCount); + second = new Entry( + Type, + Index + firstCount, + Offset + Type.Size * firstCount, + Count - firstCount); + } + /// /// Adds a field to this entry. /// @@ -463,7 +488,22 @@ internal Entry(TypeNode type, int index, int offset, int count = 1) /// The underlying alignment in bytes. /// True, if the range is properly aligned. public readonly bool IsAligned(int alignment) => - (alignment + Offset) % (Type.Size * Count) == 0; + // Check for a proper alignment of the base address + alignment % RequiredAlignment == 0; + + /// + /// Returns true if this entry can be properly aligned. + /// + /// The parent structure type. + internal readonly bool CanBeAligned(StructureType parentType) + { + int requiredAlignment = RequiredAlignment; + return + // Check for a relative alignment inside the structure + Offset % requiredAlignment == 0 && + // Check for a relative alignment of odd structure accesses + (Offset + parentType.Size) % requiredAlignment == 0; + } #endregion } @@ -499,7 +539,7 @@ internal VectorizableFieldCollection(StructureType structureType) currentOffset + nextType.Size != nextOffset) { // Register the current vectorizable entry - RegisterRange(current); + RegisterRange(structureType, current); current = new Entry( nextType, i, @@ -514,14 +554,17 @@ internal VectorizableFieldCollection(StructureType structureType) } // Add the last entry - RegisterRange(current); + RegisterRange(structureType, current); } /// /// Registers the given range entry. /// + /// The parent structure type. /// The entry to register. - private void RegisterRange(in Entry entry) + private void RegisterRange( + StructureType structureType, + in Entry entry) { int offset = entry.Offset; for ( @@ -531,12 +574,22 @@ private void RegisterRange(in Entry entry) { for (; index + stepSize <= entry.Count; index += stepSize) { - ranges.Add( - new Entry( - entry.Type, - index + entry.Index, - offset, - stepSize)); + var newEntry = new Entry( + entry.Type, + index + entry.Index, + offset, + stepSize); + if (newEntry.Count > 1 && !newEntry.CanBeAligned(structureType)) + { + newEntry.Split(out var first, out var second); + RegisterRange(structureType, first); + RegisterRange(structureType, second); + } + else + { + // The entry is properly aligned + ranges.Add(newEntry); + } offset += entry.Type.Size * stepSize; } }