Merge pull request #2122 from gfoidl/inline-vector-constants

Used inline SIMD vectors if they are constants
SixLabors · Jun 14, 2022 · 75dc96a · 75dc96a
2 parents c934e2f + 83e28b0
commit 75dc96a
Show file tree

Hide file tree

Showing 11 changed files with 265 additions and 391 deletions.
diff --git a/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Intrinsic.cs b/src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Intrinsic.cs
@@ -35,8 +35,6 @@ internal partial struct Block8x8F
         [FieldOffset(224)]
         public Vector256<float> V7;
 
-        private static readonly Vector256<int> MultiplyIntoInt16ShuffleMask = Vector256.Create(0, 1, 4, 5, 2, 3, 6, 7);
-
         private static unsafe void MultiplyIntoInt16_Avx2(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest)
         {
             DebugGuard.IsTrue(Avx2.IsSupported, "Avx2 support is required to run this operation!");
@@ -45,14 +43,15 @@ private static unsafe void MultiplyIntoInt16_Avx2(ref Block8x8F a, ref Block8x8F
             ref Vector256<float> bBase = ref b.V0;
 
             ref Vector256<short> destRef = ref dest.V01;
+            Vector256<int> multiplyIntoInt16ShuffleMask = Vector256.Create(0, 1, 4, 5, 2, 3, 6, 7);
 
             for (nint i = 0; i < 8; i += 2)
             {
                 Vector256<int> row0 = Avx.ConvertToVector256Int32(Avx.Multiply(Unsafe.Add(ref aBase, i + 0), Unsafe.Add(ref bBase, i + 0)));
                 Vector256<int> row1 = Avx.ConvertToVector256Int32(Avx.Multiply(Unsafe.Add(ref aBase, i + 1), Unsafe.Add(ref bBase, i + 1)));
 
                 Vector256<short> row = Avx2.PackSignedSaturate(row0, row1);
-                row = Avx2.PermuteVar8x32(row.AsInt32(), MultiplyIntoInt16ShuffleMask).AsInt16();
+                row = Avx2.PermuteVar8x32(row.AsInt32(), multiplyIntoInt16ShuffleMask).AsInt16();
 
                 Unsafe.Add(ref destRef, (IntPtr)((uint)i / 2)) = row;
             }

diff --git a/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs b/src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs
@@ -9,18 +9,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
 {
     internal static partial class FastFloatingPointDCT
     {
-#pragma warning disable SA1310, SA1311, IDE1006 // naming rule violation warnings
-        private static readonly Vector256<float> mm256_F_0_7071 = Vector256.Create(0.707106781f);
-        private static readonly Vector256<float> mm256_F_0_3826 = Vector256.Create(0.382683433f);
-        private static readonly Vector256<float> mm256_F_0_5411 = Vector256.Create(0.541196100f);
-        private static readonly Vector256<float> mm256_F_1_3065 = Vector256.Create(1.306562965f);
-
-        private static readonly Vector256<float> mm256_F_1_4142 = Vector256.Create(1.414213562f);
-        private static readonly Vector256<float> mm256_F_1_8477 = Vector256.Create(1.847759065f);
-        private static readonly Vector256<float> mm256_F_n1_0823 = Vector256.Create(-1.082392200f);
-        private static readonly Vector256<float> mm256_F_n2_6131 = Vector256.Create(-2.613125930f);
-#pragma warning restore SA1310, SA1311, IDE1006
-
         /// <summary>
         /// Apply floating point FDCT inplace using simd operations.
         /// </summary>
@@ -57,6 +45,7 @@ static void FDCT8x8_1D_Avx(ref Block8x8F block)
                 block.V0 = Avx.Add(tmp10, tmp11);
                 block.V4 = Avx.Subtract(tmp10, tmp11);
 
+                Vector256<float> mm256_F_0_7071 = Vector256.Create(0.707106781f);
                 Vector256<float> z1 = Avx.Multiply(Avx.Add(tmp12, tmp13), mm256_F_0_7071);
                 block.V2 = Avx.Add(tmp13, z1);
                 block.V6 = Avx.Subtract(tmp13, z1);
@@ -66,9 +55,9 @@ static void FDCT8x8_1D_Avx(ref Block8x8F block)
                 tmp11 = Avx.Add(tmp5, tmp6);
                 tmp12 = Avx.Add(tmp6, tmp7);
 
-                Vector256<float> z5 = Avx.Multiply(Avx.Subtract(tmp10, tmp12), mm256_F_0_3826);
-                Vector256<float> z2 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, mm256_F_0_5411, tmp10);
-                Vector256<float> z4 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, mm256_F_1_3065, tmp12);
+                Vector256<float> z5 = Avx.Multiply(Avx.Subtract(tmp10, tmp12), Vector256.Create(0.382683433f));         // mm256_F_0_3826
+                Vector256<float> z2 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, Vector256.Create(0.541196100f), tmp10);    // mm256_F_0_5411
+                Vector256<float> z4 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, Vector256.Create(1.306562965f), tmp12);    // mm256_F_1_3065
                 Vector256<float> z3 = Avx.Multiply(tmp11, mm256_F_0_7071);
 
                 Vector256<float> z11 = Avx.Add(tmp7, z3);
@@ -109,6 +98,7 @@ static void IDCT8x8_1D_Avx(ref Block8x8F block)
                 Vector256<float> tmp10 = Avx.Add(z5, tmp2);
                 Vector256<float> tmp11 = Avx.Subtract(z5, tmp2);
 
+                Vector256<float> mm256_F_1_4142 = Vector256.Create(1.414213562f);
                 Vector256<float> tmp13 = Avx.Add(tmp1, tmp3);
                 Vector256<float> tmp12 = SimdUtils.HwIntrinsics.MultiplySubstract(tmp13, Avx.Subtract(tmp1, tmp3), mm256_F_1_4142);
 
@@ -131,10 +121,10 @@ static void IDCT8x8_1D_Avx(ref Block8x8F block)
                 tmp7 = Avx.Add(z11, z13);
                 tmp11 = Avx.Multiply(Avx.Subtract(z11, z13), mm256_F_1_4142);
 
-                z5 = Avx.Multiply(Avx.Add(z10, z12), mm256_F_1_8477);
+                z5 = Avx.Multiply(Avx.Add(z10, z12), Vector256.Create(1.847759065f));                   // mm256_F_1_8477
 
-                tmp10 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, z12, mm256_F_n1_0823);
-                tmp12 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, z10, mm256_F_n2_6131);
+                tmp10 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, z12, Vector256.Create(-1.082392200f));   // mm256_F_n1_0823
+                tmp12 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, z10, Vector256.Create(-2.613125930f));   // mm256_F_n2_6131
 
                 tmp6 = Avx.Subtract(tmp12, tmp7);
                 tmp5 = Avx.Subtract(tmp11, tmp6);