Skip to content

Commit 75dc96a

Browse files
Merge pull request #2122 from gfoidl/inline-vector-constants
Used inline SIMD vectors if they are constants
2 parents c934e2f + 83e28b0 commit 75dc96a

11 files changed

+265
-391
lines changed

src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Intrinsic.cs

+2-3
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@ internal partial struct Block8x8F
3535
[FieldOffset(224)]
3636
public Vector256<float> V7;
3737

38-
private static readonly Vector256<int> MultiplyIntoInt16ShuffleMask = Vector256.Create(0, 1, 4, 5, 2, 3, 6, 7);
39-
4038
private static unsafe void MultiplyIntoInt16_Avx2(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest)
4139
{
4240
DebugGuard.IsTrue(Avx2.IsSupported, "Avx2 support is required to run this operation!");
@@ -45,14 +43,15 @@ private static unsafe void MultiplyIntoInt16_Avx2(ref Block8x8F a, ref Block8x8F
4543
ref Vector256<float> bBase = ref b.V0;
4644

4745
ref Vector256<short> destRef = ref dest.V01;
46+
Vector256<int> multiplyIntoInt16ShuffleMask = Vector256.Create(0, 1, 4, 5, 2, 3, 6, 7);
4847

4948
for (nint i = 0; i < 8; i += 2)
5049
{
5150
Vector256<int> row0 = Avx.ConvertToVector256Int32(Avx.Multiply(Unsafe.Add(ref aBase, i + 0), Unsafe.Add(ref bBase, i + 0)));
5251
Vector256<int> row1 = Avx.ConvertToVector256Int32(Avx.Multiply(Unsafe.Add(ref aBase, i + 1), Unsafe.Add(ref bBase, i + 1)));
5352

5453
Vector256<short> row = Avx2.PackSignedSaturate(row0, row1);
55-
row = Avx2.PermuteVar8x32(row.AsInt32(), MultiplyIntoInt16ShuffleMask).AsInt16();
54+
row = Avx2.PermuteVar8x32(row.AsInt32(), multiplyIntoInt16ShuffleMask).AsInt16();
5655

5756
Unsafe.Add(ref destRef, (IntPtr)((uint)i / 2)) = row;
5857
}

src/ImageSharp/Formats/Jpeg/Components/FastFloatingPointDCT.Intrinsic.cs

+8-18
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
99
{
1010
internal static partial class FastFloatingPointDCT
1111
{
12-
#pragma warning disable SA1310, SA1311, IDE1006 // naming rule violation warnings
13-
private static readonly Vector256<float> mm256_F_0_7071 = Vector256.Create(0.707106781f);
14-
private static readonly Vector256<float> mm256_F_0_3826 = Vector256.Create(0.382683433f);
15-
private static readonly Vector256<float> mm256_F_0_5411 = Vector256.Create(0.541196100f);
16-
private static readonly Vector256<float> mm256_F_1_3065 = Vector256.Create(1.306562965f);
17-
18-
private static readonly Vector256<float> mm256_F_1_4142 = Vector256.Create(1.414213562f);
19-
private static readonly Vector256<float> mm256_F_1_8477 = Vector256.Create(1.847759065f);
20-
private static readonly Vector256<float> mm256_F_n1_0823 = Vector256.Create(-1.082392200f);
21-
private static readonly Vector256<float> mm256_F_n2_6131 = Vector256.Create(-2.613125930f);
22-
#pragma warning restore SA1310, SA1311, IDE1006
23-
2412
/// <summary>
2513
/// Apply floating point FDCT inplace using simd operations.
2614
/// </summary>
@@ -57,6 +45,7 @@ static void FDCT8x8_1D_Avx(ref Block8x8F block)
5745
block.V0 = Avx.Add(tmp10, tmp11);
5846
block.V4 = Avx.Subtract(tmp10, tmp11);
5947

48+
Vector256<float> mm256_F_0_7071 = Vector256.Create(0.707106781f);
6049
Vector256<float> z1 = Avx.Multiply(Avx.Add(tmp12, tmp13), mm256_F_0_7071);
6150
block.V2 = Avx.Add(tmp13, z1);
6251
block.V6 = Avx.Subtract(tmp13, z1);
@@ -66,9 +55,9 @@ static void FDCT8x8_1D_Avx(ref Block8x8F block)
6655
tmp11 = Avx.Add(tmp5, tmp6);
6756
tmp12 = Avx.Add(tmp6, tmp7);
6857

69-
Vector256<float> z5 = Avx.Multiply(Avx.Subtract(tmp10, tmp12), mm256_F_0_3826);
70-
Vector256<float> z2 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, mm256_F_0_5411, tmp10);
71-
Vector256<float> z4 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, mm256_F_1_3065, tmp12);
58+
Vector256<float> z5 = Avx.Multiply(Avx.Subtract(tmp10, tmp12), Vector256.Create(0.382683433f)); // mm256_F_0_3826
59+
Vector256<float> z2 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, Vector256.Create(0.541196100f), tmp10); // mm256_F_0_5411
60+
Vector256<float> z4 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, Vector256.Create(1.306562965f), tmp12); // mm256_F_1_3065
7261
Vector256<float> z3 = Avx.Multiply(tmp11, mm256_F_0_7071);
7362

7463
Vector256<float> z11 = Avx.Add(tmp7, z3);
@@ -109,6 +98,7 @@ static void IDCT8x8_1D_Avx(ref Block8x8F block)
10998
Vector256<float> tmp10 = Avx.Add(z5, tmp2);
11099
Vector256<float> tmp11 = Avx.Subtract(z5, tmp2);
111100

101+
Vector256<float> mm256_F_1_4142 = Vector256.Create(1.414213562f);
112102
Vector256<float> tmp13 = Avx.Add(tmp1, tmp3);
113103
Vector256<float> tmp12 = SimdUtils.HwIntrinsics.MultiplySubstract(tmp13, Avx.Subtract(tmp1, tmp3), mm256_F_1_4142);
114104

@@ -131,10 +121,10 @@ static void IDCT8x8_1D_Avx(ref Block8x8F block)
131121
tmp7 = Avx.Add(z11, z13);
132122
tmp11 = Avx.Multiply(Avx.Subtract(z11, z13), mm256_F_1_4142);
133123

134-
z5 = Avx.Multiply(Avx.Add(z10, z12), mm256_F_1_8477);
124+
z5 = Avx.Multiply(Avx.Add(z10, z12), Vector256.Create(1.847759065f)); // mm256_F_1_8477
135125

136-
tmp10 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, z12, mm256_F_n1_0823);
137-
tmp12 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, z10, mm256_F_n2_6131);
126+
tmp10 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, z12, Vector256.Create(-1.082392200f)); // mm256_F_n1_0823
127+
tmp12 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, z10, Vector256.Create(-2.613125930f)); // mm256_F_n2_6131
138128

139129
tmp6 = Avx.Subtract(tmp12, tmp7);
140130
tmp5 = Avx.Subtract(tmp11, tmp6);

0 commit comments

Comments
 (0)