@@ -9,18 +9,6 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
9
9
{
10
10
internal static partial class FastFloatingPointDCT
11
11
{
12
- #pragma warning disable SA1310 , SA1311 , IDE1006 // naming rule violation warnings
13
- private static readonly Vector256 < float > mm256_F_0_7071 = Vector256 . Create ( 0.707106781f ) ;
14
- private static readonly Vector256 < float > mm256_F_0_3826 = Vector256 . Create ( 0.382683433f ) ;
15
- private static readonly Vector256 < float > mm256_F_0_5411 = Vector256 . Create ( 0.541196100f ) ;
16
- private static readonly Vector256 < float > mm256_F_1_3065 = Vector256 . Create ( 1.306562965f ) ;
17
-
18
- private static readonly Vector256 < float > mm256_F_1_4142 = Vector256 . Create ( 1.414213562f ) ;
19
- private static readonly Vector256 < float > mm256_F_1_8477 = Vector256 . Create ( 1.847759065f ) ;
20
- private static readonly Vector256 < float > mm256_F_n1_0823 = Vector256 . Create ( - 1.082392200f ) ;
21
- private static readonly Vector256 < float > mm256_F_n2_6131 = Vector256 . Create ( - 2.613125930f ) ;
22
- #pragma warning restore SA1310 , SA1311 , IDE1006
23
-
24
12
/// <summary>
25
13
/// Apply floating point FDCT inplace using simd operations.
26
14
/// </summary>
@@ -57,6 +45,7 @@ static void FDCT8x8_1D_Avx(ref Block8x8F block)
57
45
block . V0 = Avx . Add ( tmp10 , tmp11 ) ;
58
46
block . V4 = Avx . Subtract ( tmp10 , tmp11 ) ;
59
47
48
+ Vector256 < float > mm256_F_0_7071 = Vector256 . Create ( 0.707106781f ) ;
60
49
Vector256 < float > z1 = Avx . Multiply ( Avx . Add ( tmp12 , tmp13 ) , mm256_F_0_7071 ) ;
61
50
block . V2 = Avx . Add ( tmp13 , z1 ) ;
62
51
block . V6 = Avx . Subtract ( tmp13 , z1 ) ;
@@ -66,9 +55,9 @@ static void FDCT8x8_1D_Avx(ref Block8x8F block)
66
55
tmp11 = Avx . Add ( tmp5 , tmp6 ) ;
67
56
tmp12 = Avx . Add ( tmp6 , tmp7 ) ;
68
57
69
- Vector256 < float > z5 = Avx . Multiply ( Avx . Subtract ( tmp10 , tmp12 ) , mm256_F_0_3826 ) ;
70
- Vector256 < float > z2 = SimdUtils . HwIntrinsics . MultiplyAdd ( z5 , mm256_F_0_5411 , tmp10 ) ;
71
- Vector256 < float > z4 = SimdUtils . HwIntrinsics . MultiplyAdd ( z5 , mm256_F_1_3065 , tmp12 ) ;
58
+ Vector256 < float > z5 = Avx . Multiply ( Avx . Subtract ( tmp10 , tmp12 ) , Vector256 . Create ( 0.382683433f ) ) ; // mm256_F_0_3826
59
+ Vector256 < float > z2 = SimdUtils . HwIntrinsics . MultiplyAdd ( z5 , Vector256 . Create ( 0.541196100f ) , tmp10 ) ; // mm256_F_0_5411
60
+ Vector256 < float > z4 = SimdUtils . HwIntrinsics . MultiplyAdd ( z5 , Vector256 . Create ( 1.306562965f ) , tmp12 ) ; // mm256_F_1_3065
72
61
Vector256 < float > z3 = Avx . Multiply ( tmp11 , mm256_F_0_7071 ) ;
73
62
74
63
Vector256 < float > z11 = Avx . Add ( tmp7 , z3 ) ;
@@ -109,6 +98,7 @@ static void IDCT8x8_1D_Avx(ref Block8x8F block)
109
98
Vector256 < float > tmp10 = Avx . Add ( z5 , tmp2 ) ;
110
99
Vector256 < float > tmp11 = Avx . Subtract ( z5 , tmp2 ) ;
111
100
101
+ Vector256 < float > mm256_F_1_4142 = Vector256 . Create ( 1.414213562f ) ;
112
102
Vector256 < float > tmp13 = Avx . Add ( tmp1 , tmp3 ) ;
113
103
Vector256 < float > tmp12 = SimdUtils . HwIntrinsics . MultiplySubstract ( tmp13 , Avx . Subtract ( tmp1 , tmp3 ) , mm256_F_1_4142 ) ;
114
104
@@ -131,10 +121,10 @@ static void IDCT8x8_1D_Avx(ref Block8x8F block)
131
121
tmp7 = Avx . Add ( z11 , z13 ) ;
132
122
tmp11 = Avx . Multiply ( Avx . Subtract ( z11 , z13 ) , mm256_F_1_4142 ) ;
133
123
134
- z5 = Avx . Multiply ( Avx . Add ( z10 , z12 ) , mm256_F_1_8477 ) ;
124
+ z5 = Avx . Multiply ( Avx . Add ( z10 , z12 ) , Vector256 . Create ( 1.847759065f ) ) ; // mm256_F_1_8477
135
125
136
- tmp10 = SimdUtils . HwIntrinsics . MultiplyAdd ( z5 , z12 , mm256_F_n1_0823 ) ;
137
- tmp12 = SimdUtils . HwIntrinsics . MultiplyAdd ( z5 , z10 , mm256_F_n2_6131 ) ;
126
+ tmp10 = SimdUtils . HwIntrinsics . MultiplyAdd ( z5 , z12 , Vector256 . Create ( - 1.082392200f ) ) ; // mm256_F_n1_0823
127
+ tmp12 = SimdUtils . HwIntrinsics . MultiplyAdd ( z5 , z10 , Vector256 . Create ( - 2.613125930f ) ) ; // mm256_F_n2_6131
138
128
139
129
tmp6 = Avx . Subtract ( tmp12 , tmp7 ) ;
140
130
tmp5 = Avx . Subtract ( tmp11 , tmp6 ) ;
0 commit comments