Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions src/ImageSharp/Formats/Jpeg/Components/Block8x8F.Intrinsic.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ internal partial struct Block8x8F
[FieldOffset(224)]
public Vector256<float> V7;

private static readonly Vector256<int> MultiplyIntoInt16ShuffleMask = Vector256.Create(0, 1, 4, 5, 2, 3, 6, 7);

private static unsafe void MultiplyIntoInt16_Avx2(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest)
{
DebugGuard.IsTrue(Avx2.IsSupported, "Avx2 support is required to run this operation!");
Expand All @@ -45,14 +43,15 @@ private static unsafe void MultiplyIntoInt16_Avx2(ref Block8x8F a, ref Block8x8F
ref Vector256<float> bBase = ref b.V0;

ref Vector256<short> destRef = ref dest.V01;
Vector256<int> multiplyIntoInt16ShuffleMask = Vector256.Create(0, 1, 4, 5, 2, 3, 6, 7);

for (nint i = 0; i < 8; i += 2)
{
Vector256<int> row0 = Avx.ConvertToVector256Int32(Avx.Multiply(Unsafe.Add(ref aBase, i + 0), Unsafe.Add(ref bBase, i + 0)));
Vector256<int> row1 = Avx.ConvertToVector256Int32(Avx.Multiply(Unsafe.Add(ref aBase, i + 1), Unsafe.Add(ref bBase, i + 1)));

Vector256<short> row = Avx2.PackSignedSaturate(row0, row1);
row = Avx2.PermuteVar8x32(row.AsInt32(), MultiplyIntoInt16ShuffleMask).AsInt16();
row = Avx2.PermuteVar8x32(row.AsInt32(), multiplyIntoInt16ShuffleMask).AsInt16();

Unsafe.Add(ref destRef, (IntPtr)((uint)i / 2)) = row;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
{
internal static partial class FastFloatingPointDCT
{
#pragma warning disable SA1310, SA1311, IDE1006 // naming rule violation warnings
private static readonly Vector256<float> mm256_F_0_7071 = Vector256.Create(0.707106781f);
private static readonly Vector256<float> mm256_F_0_3826 = Vector256.Create(0.382683433f);
private static readonly Vector256<float> mm256_F_0_5411 = Vector256.Create(0.541196100f);
private static readonly Vector256<float> mm256_F_1_3065 = Vector256.Create(1.306562965f);

private static readonly Vector256<float> mm256_F_1_4142 = Vector256.Create(1.414213562f);
private static readonly Vector256<float> mm256_F_1_8477 = Vector256.Create(1.847759065f);
private static readonly Vector256<float> mm256_F_n1_0823 = Vector256.Create(-1.082392200f);
private static readonly Vector256<float> mm256_F_n2_6131 = Vector256.Create(-2.613125930f);
#pragma warning restore SA1310, SA1311, IDE1006

/// <summary>
/// Apply floating point FDCT inplace using simd operations.
/// </summary>
Expand Down Expand Up @@ -57,6 +45,7 @@ static void FDCT8x8_1D_Avx(ref Block8x8F block)
block.V0 = Avx.Add(tmp10, tmp11);
block.V4 = Avx.Subtract(tmp10, tmp11);

Vector256<float> mm256_F_0_7071 = Vector256.Create(0.707106781f);
Vector256<float> z1 = Avx.Multiply(Avx.Add(tmp12, tmp13), mm256_F_0_7071);
block.V2 = Avx.Add(tmp13, z1);
block.V6 = Avx.Subtract(tmp13, z1);
Expand All @@ -66,9 +55,9 @@ static void FDCT8x8_1D_Avx(ref Block8x8F block)
tmp11 = Avx.Add(tmp5, tmp6);
tmp12 = Avx.Add(tmp6, tmp7);

Vector256<float> z5 = Avx.Multiply(Avx.Subtract(tmp10, tmp12), mm256_F_0_3826);
Vector256<float> z2 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, mm256_F_0_5411, tmp10);
Vector256<float> z4 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, mm256_F_1_3065, tmp12);
Vector256<float> z5 = Avx.Multiply(Avx.Subtract(tmp10, tmp12), Vector256.Create(0.382683433f)); // mm256_F_0_3826
Vector256<float> z2 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, Vector256.Create(0.541196100f), tmp10); // mm256_F_0_5411
Vector256<float> z4 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, Vector256.Create(1.306562965f), tmp12); // mm256_F_1_3065
Vector256<float> z3 = Avx.Multiply(tmp11, mm256_F_0_7071);

Vector256<float> z11 = Avx.Add(tmp7, z3);
Expand Down Expand Up @@ -109,6 +98,7 @@ static void IDCT8x8_1D_Avx(ref Block8x8F block)
Vector256<float> tmp10 = Avx.Add(z5, tmp2);
Vector256<float> tmp11 = Avx.Subtract(z5, tmp2);

Vector256<float> mm256_F_1_4142 = Vector256.Create(1.414213562f);
Vector256<float> tmp13 = Avx.Add(tmp1, tmp3);
Vector256<float> tmp12 = SimdUtils.HwIntrinsics.MultiplySubstract(tmp13, Avx.Subtract(tmp1, tmp3), mm256_F_1_4142);

Expand All @@ -131,10 +121,10 @@ static void IDCT8x8_1D_Avx(ref Block8x8F block)
tmp7 = Avx.Add(z11, z13);
tmp11 = Avx.Multiply(Avx.Subtract(z11, z13), mm256_F_1_4142);

z5 = Avx.Multiply(Avx.Add(z10, z12), mm256_F_1_8477);
z5 = Avx.Multiply(Avx.Add(z10, z12), Vector256.Create(1.847759065f)); // mm256_F_1_8477

tmp10 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, z12, mm256_F_n1_0823);
tmp12 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, z10, mm256_F_n2_6131);
tmp10 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, z12, Vector256.Create(-1.082392200f)); // mm256_F_n1_0823
tmp12 = SimdUtils.HwIntrinsics.MultiplyAdd(z5, z10, Vector256.Create(-2.613125930f)); // mm256_F_n2_6131

tmp6 = Avx.Subtract(tmp12, tmp7);
tmp5 = Avx.Subtract(tmp11, tmp6);
Expand Down
Loading