Skip to content

Commit

Permalink
Merge pull request #2122 from gfoidl/inline-vector-constants
Browse files Browse the repository at this point in the history
Used inline SIMD vectors if they are constants
  • Loading branch information
JimBobSquarePants authored Jun 14, 2022
2 parents c934e2f + 83e28b0 commit 75dc96a
Show file tree
Hide file tree
Showing 11 changed files with 265 additions and 391 deletions.
5 changes: 2 additions & 3 deletions src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Intrinsic.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ internal partial struct Block8x8F
[FieldOffset(224)]
public Vector256<float> V7;

private static readonly Vector256<int> MultiplyIntoInt16ShuffleMask = Vector256.Create(0, 1, 4, 5, 2, 3, 6, 7);

private static unsafe void MultiplyIntoInt16_Avx2(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest)
{
DebugGuard.IsTrue(Avx2.IsSupported, "Avx2 support is required to run this operation!");
Expand All @@ -45,14 +43,15 @@ private static unsafe void MultiplyIntoInt16_Avx2(ref Block8x8F a, ref Block8x8F
ref Vector256<float> bBase = ref b.V0;

ref Vector256<short> destRef = ref dest.V01;
Vector256<int> multiplyIntoInt16ShuffleMask = Vector256.Create(0, 1, 4, 5, 2, 3, 6, 7);

for (nint i = 0; i < 8; i += 2)
{
Vector256<int> row0 = Avx.ConvertToVector256Int32(Avx.Multiply(Unsafe.Add(ref aBase, i + 0), Unsafe.Add(ref bBase, i + 0)));
Vector256<int> row1 = Avx.ConvertToVector256Int32(Avx.Multiply(Unsafe.Add(ref aBase, i + 1), Unsafe.Add(ref bBase, i + 1)));

Vector256<short> row = Avx2.PackSignedSaturate(row0, row1);
row = Avx2.PermuteVar8x32(row.AsInt32(), MultiplyIntoInt16ShuffleMask).AsInt16();
row = Avx2.PermuteVar8x32(row.AsInt32(), multiplyIntoInt16ShuffleMask).AsInt16();

Unsafe.Add(ref destRef, (IntPtr)((uint)i / 2)) = row;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
internal static partial class FastFloatingPointDCT
{
#pragma warning disable SA1310, SA1311, IDE1006 // naming rule violation warnings
private static readonly Vector256<float> mm256_F_0_7071 = Vector256.Create(0.707106781f);
private static readonly Vector256<float> mm256_F_0_3826 = Vector256.Create(0.382683433f);
private static readonly Vector256<float> mm256_F_0_5411 = Vector256.Create(0.541196100f);
private static readonly Vector256<float> mm256_F_1_3065 = Vector256.Create(1.306562965f);

private static readonly Vector256<float> mm256_F_1_4142 = Vector256.Create(1.414213562f);
private static readonly Vector256<float> mm256_F_1_8477 = Vector256.Create(1.847759065f);
private static readonly Vector256<float> mm256_F_n1_0823 = Vector256.Create(-1.082392200f);
private static readonly Vector256<float> mm256_F_n2_6131 = Vector256.Create(-2.613125930f);
#pragma warning restore SA1310, SA1311, IDE1006

/// <summary>
/// Apply floating point FDCT inplace using simd operations.
/// </summary>
Expand Down Expand Up @@ -57,6 +45,7 @@ static void FDCT8x8_1D_Avx(ref Block8x8F block)
block.V0 = Avx.Add(tmp10, tmp11);
block.V4 = Avx.Subtract(tmp10, tmp11);

Vector256<float> mm256_F_0_7071 = Vector256.Create(0.707106781f);
Vector256<float> z1 = Avx.Multiply(Avx.Add(tmp12, tmp13), mm256_F_0_7071);
block.V2 = Avx.Add(tmp13, z1);
block.V6 = Avx.Subtract(tmp13, z1);
Expand All @@ -66,9 +55,9 @@ static void FDCT8x8_1D_Avx(ref Block8x8F block)
tmp11 = Avx.Add(tmp5, tmp6);
tmp12 = Avx.Add(tmp6, tmp7);

Vector256<float> z5 = Avx.Multiply(Avx.Subtract(tmp10, tmp12), mm256_F_0_3826);
Vector256<float> z2 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, mm256_F_0_5411, tmp10);
Vector256<float> z4 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, mm256_F_1_3065, tmp12);
Vector256<float> z5 = Avx.Multiply(Avx.Subtract(tmp10, tmp12), Vector256.Create(0.382683433f)); // mm256_F_0_3826
Vector256<float> z2 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, Vector256.Create(0.541196100f), tmp10); // mm256_F_0_5411
Vector256<float> z4 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, Vector256.Create(1.306562965f), tmp12); // mm256_F_1_3065
Vector256<float> z3 = Avx.Multiply(tmp11, mm256_F_0_7071);

Vector256<float> z11 = Avx.Add(tmp7, z3);
Expand Down Expand Up @@ -109,6 +98,7 @@ static void IDCT8x8_1D_Avx(ref Block8x8F block)
Vector256<float> tmp10 = Avx.Add(z5, tmp2);
Vector256<float> tmp11 = Avx.Subtract(z5, tmp2);

Vector256<float> mm256_F_1_4142 = Vector256.Create(1.414213562f);
Vector256<float> tmp13 = Avx.Add(tmp1, tmp3);
Vector256<float> tmp12 = SimdUtils.HwIntrinsics.MultiplySubstract(tmp13, Avx.Subtract(tmp1, tmp3), mm256_F_1_4142);

Expand All @@ -131,10 +121,10 @@ static void IDCT8x8_1D_Avx(ref Block8x8F block)
tmp7 = Avx.Add(z11, z13);
tmp11 = Avx.Multiply(Avx.Subtract(z11, z13), mm256_F_1_4142);

z5 = Avx.Multiply(Avx.Add(z10, z12), mm256_F_1_8477);
z5 = Avx.Multiply(Avx.Add(z10, z12), Vector256.Create(1.847759065f)); // mm256_F_1_8477

tmp10 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, z12, mm256_F_n1_0823);
tmp12 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, z10, mm256_F_n2_6131);
tmp10 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, z12, Vector256.Create(-1.082392200f)); // mm256_F_n1_0823
tmp12 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, z10, Vector256.Create(-2.613125930f)); // mm256_F_n2_6131

tmp6 = Avx.Subtract(tmp12, tmp7);
tmp5 = Avx.Subtract(tmp11, tmp6);
Expand Down
Loading

0 comments on commit 75dc96a

Please sign in to comment.