2
2
// One example is tests/crashtest/images/imagetestsuite/b0b8914cc5f7a6eff409f16d8cc236c5.jpg
3
3
// That's why wrapping operators are needed.
4
4
use crate :: parser:: Dimensions ;
5
- use std:: num:: Wrapping ;
5
+ use std:: {
6
+ convert:: TryFrom ,
7
+ num:: Wrapping ,
8
+ } ;
6
9
7
10
pub ( crate ) fn choose_idct_size ( full_size : Dimensions , requested_size : Dimensions ) -> usize {
8
11
fn scaled ( len : u16 , scale : usize ) -> u16 { ( ( len as u32 * scale as u32 - 1 ) / 8 + 1 ) as u16 }
@@ -28,7 +31,7 @@ fn test_choose_idct_size() {
28
31
assert_eq ! ( choose_idct_size( Dimensions { width: 5472 , height: 3648 } , Dimensions { width: 685 , height: 999 } ) , 2 ) ;
29
32
assert_eq ! ( choose_idct_size( Dimensions { width: 5472 , height: 3648 } , Dimensions { width: 1000 , height: 1000 } ) , 2 ) ;
30
33
assert_eq ! ( choose_idct_size( Dimensions { width: 5472 , height: 3648 } , Dimensions { width: 1400 , height: 1400 } ) , 4 ) ;
31
-
34
+
32
35
assert_eq ! ( choose_idct_size( Dimensions { width: 5472 , height: 3648 } , Dimensions { width: 5472 , height: 3648 } ) , 8 ) ;
33
36
assert_eq ! ( choose_idct_size( Dimensions { width: 5472 , height: 3648 } , Dimensions { width: 16384 , height: 16384 } ) , 8 ) ;
34
37
assert_eq ! ( choose_idct_size( Dimensions { width: 1 , height: 1 } , Dimensions { width: 65535 , height: 65535 } ) , 8 ) ;
@@ -45,79 +48,74 @@ pub(crate) fn dequantize_and_idct_block(scale: usize, coefficients: &[i16], quan
45
48
}
46
49
}
47
50
48
- // This is based on stb_image's 'stbi__idct_block'.
49
- fn dequantize_and_idct_block_8x8 ( coefficients : & [ i16 ] , quantization_table : & [ u16 ; 64 ] , output_linestride : usize , output : & mut [ u8 ] ) {
51
+ pub fn dequantize_and_idct_block_8x8 (
52
+ coefficients : & [ i16 ] ,
53
+ quantization_table : & [ u16 ; 64 ] ,
54
+ output_linestride : usize ,
55
+ output : & mut [ u8 ]
56
+ ) {
50
57
debug_assert_eq ! ( coefficients. len( ) , 64 ) ;
58
+ let output = output
59
+ . chunks_mut ( output_linestride) ;
60
+ dequantize_and_idct_block_8x8_inner ( coefficients, quantization_table, output)
61
+ }
62
+
63
+ // This is based on stb_image's 'stbi__idct_block'.
64
+ fn dequantize_and_idct_block_8x8_inner < ' a , I > (
65
+ coefficients : & [ i16 ] ,
66
+ quantization_table : & [ u16 ; 64 ] ,
67
+ output : I ,
68
+ ) where
69
+ I : IntoIterator < Item = & ' a mut [ u8 ] > ,
70
+ I :: IntoIter : ExactSizeIterator < Item = & ' a mut [ u8 ] > ,
71
+ {
72
+ let output = output. into_iter ( ) ;
73
+ debug_assert ! (
74
+ output. len( ) >= 8 ,
75
+ "Output iterator has the wrong length: {}" ,
76
+ output. len( )
77
+ ) ;
51
78
52
- let mut temp = [ Wrapping ( 0i32 ) ; 64 ] ;
79
+ let mut temp = [ Wrapping ( 0 ) ; 64 ] ;
53
80
54
81
// columns
55
- for i in 0 .. 8 {
56
- // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
57
- if coefficients[ i + 8 ] == 0 && coefficients[ i + 16 ] == 0 && coefficients[ i + 24 ] == 0 &&
58
- coefficients[ i + 32 ] == 0 && coefficients[ i + 40 ] == 0 && coefficients[ i + 48 ] == 0 &&
59
- coefficients[ i + 56 ] == 0 {
60
- let dcterm = Wrapping ( coefficients[ i] as i32 * quantization_table[ i] as i32 ) << 2 ;
61
- temp[ i] = dcterm;
62
- temp[ i + 8 ] = dcterm;
82
+ for i in 0 ..8 {
83
+ if coefficients[ i + 8 ] == 0
84
+ && coefficients[ i + 16 ] == 0
85
+ && coefficients[ i + 24 ] == 0
86
+ && coefficients[ i + 32 ] == 0
87
+ && coefficients[ i + 40 ] == 0
88
+ && coefficients[ i + 48 ] == 0
89
+ && coefficients[ i + 56 ] == 0
90
+ {
91
+ let dcterm = dequantize ( coefficients[ i] , quantization_table[ i] ) << 2 ;
92
+ temp[ i] = dcterm;
93
+ temp[ i + 8 ] = dcterm;
63
94
temp[ i + 16 ] = dcterm;
64
95
temp[ i + 24 ] = dcterm;
65
96
temp[ i + 32 ] = dcterm;
66
97
temp[ i + 40 ] = dcterm;
67
98
temp[ i + 48 ] = dcterm;
68
99
temp[ i + 56 ] = dcterm;
69
- }
70
- else {
71
- let s0 = Wrapping ( coefficients[ i] as i32 * quantization_table[ i] as i32 ) ;
72
- let s1 = Wrapping ( coefficients[ i + 8 ] as i32 * quantization_table[ i + 8 ] as i32 ) ;
73
- let s2 = Wrapping ( coefficients[ i + 16 ] as i32 * quantization_table[ i + 16 ] as i32 ) ;
74
- let s3 = Wrapping ( coefficients[ i + 24 ] as i32 * quantization_table[ i + 24 ] as i32 ) ;
75
- let s4 = Wrapping ( coefficients[ i + 32 ] as i32 * quantization_table[ i + 32 ] as i32 ) ;
76
- let s5 = Wrapping ( coefficients[ i + 40 ] as i32 * quantization_table[ i + 40 ] as i32 ) ;
77
- let s6 = Wrapping ( coefficients[ i + 48 ] as i32 * quantization_table[ i + 48 ] as i32 ) ;
78
- let s7 = Wrapping ( coefficients[ i + 56 ] as i32 * quantization_table[ i + 56 ] as i32 ) ;
79
-
80
- let p2 = s2;
81
- let p3 = s6;
82
- let p1 = ( p2 + p3) * stbi_f2f ( 0.5411961 ) ;
83
- let t2 = p1 + p3 * stbi_f2f ( -1.847759065 ) ;
84
- let t3 = p1 + p2 * stbi_f2f ( 0.765366865 ) ;
85
- let p2 = s0;
86
- let p3 = s4;
87
- let t0 = stbi_fsh ( p2 + p3) ;
88
- let t1 = stbi_fsh ( p2 - p3) ;
89
- let x0 = t0 + t3;
90
- let x3 = t0 - t3;
91
- let x1 = t1 + t2;
92
- let x2 = t1 - t2;
93
- let t0 = s7;
94
- let t1 = s5;
95
- let t2 = s3;
96
- let t3 = s1;
97
- let p3 = t0 + t2;
98
- let p4 = t1 + t3;
99
- let p1 = t0 + t3;
100
- let p2 = t1 + t2;
101
- let p5 = ( p3 + p4) * stbi_f2f ( 1.175875602 ) ;
102
- let t0 = t0 * stbi_f2f ( 0.298631336 ) ;
103
- let t1 = t1 * stbi_f2f ( 2.053119869 ) ;
104
- let t2 = t2 * stbi_f2f ( 3.072711026 ) ;
105
- let t3 = t3 * stbi_f2f ( 1.501321110 ) ;
106
- let p1 = p5 + ( p1 * stbi_f2f ( -0.899976223 ) ) ;
107
- let p2 = p5 + ( p2 * stbi_f2f ( -2.562915447 ) ) ;
108
- let p3 = p3 * stbi_f2f ( -1.961570560 ) ;
109
- let p4 = p4 * stbi_f2f ( -0.390180644 ) ;
110
- let t3 = t3 + p1 + p4;
111
- let t2 = t2 + p2 + p3;
112
- let t1 = t1 + p2 + p4;
113
- let t0 = t0 + p1 + p3;
114
-
115
- // constants scaled things up by 1<<12; let's bring them back
116
- // down, but keep 2 extra bits of precision
117
- let x0 = x0 + Wrapping ( 512 ) ;
118
- let x1 = x1 + Wrapping ( 512 ) ;
119
- let x2 = x2 + Wrapping ( 512 ) ;
120
- let x3 = x3 + Wrapping ( 512 ) ;
100
+ } else {
101
+ let s0 = dequantize ( coefficients[ i] , quantization_table[ i] ) ;
102
+ let s1 = dequantize ( coefficients[ i + 8 ] , quantization_table[ i + 8 ] ) ;
103
+ let s2 = dequantize ( coefficients[ i + 16 ] , quantization_table[ i + 16 ] ) ;
104
+ let s3 = dequantize ( coefficients[ i + 24 ] , quantization_table[ i + 24 ] ) ;
105
+ let s4 = dequantize ( coefficients[ i + 32 ] , quantization_table[ i + 32 ] ) ;
106
+ let s5 = dequantize ( coefficients[ i + 40 ] , quantization_table[ i + 40 ] ) ;
107
+ let s6 = dequantize ( coefficients[ i + 48 ] , quantization_table[ i + 48 ] ) ;
108
+ let s7 = dequantize ( coefficients[ i + 56 ] , quantization_table[ i + 56 ] ) ;
109
+
110
+ let Kernel {
111
+ xs : [ x0, x1, x2, x3] ,
112
+ ts : [ t0, t1, t2, t3] ,
113
+ } = kernel (
114
+ [ s0, s1, s2, s3, s4, s5, s6, s7] ,
115
+ // constants scaled things up by 1<<12; let's bring them back
116
+ // down, but keep 2 extra bits of precision
117
+ 512 ,
118
+ ) ;
121
119
122
120
temp[ i] = ( x0 + t3) >> 10 ;
123
121
temp[ i + 56 ] = ( x0 - t3) >> 10 ;
@@ -130,72 +128,128 @@ fn dequantize_and_idct_block_8x8(coefficients: &[i16], quantization_table: &[u16
130
128
}
131
129
}
132
130
133
- for i in 0 .. 8 {
134
- // no fast case since the first 1D IDCT spread components out
135
- let s0 = temp[ i * 8 ] ;
136
- let s1 = temp[ i * 8 + 1 ] ;
137
- let s2 = temp[ i * 8 + 2 ] ;
138
- let s3 = temp[ i * 8 + 3 ] ;
139
- let s4 = temp[ i * 8 + 4 ] ;
140
- let s5 = temp[ i * 8 + 5 ] ;
141
- let s6 = temp[ i * 8 + 6 ] ;
142
- let s7 = temp[ i * 8 + 7 ] ;
143
-
144
- let p2 = s2;
145
- let p3 = s6;
146
- let p1 = ( p2 + p3) * stbi_f2f ( 0.5411961 ) ;
147
- let t2 = p1 + p3 * stbi_f2f ( -1.847759065 ) ;
148
- let t3 = p1 + p2 * stbi_f2f ( 0.765366865 ) ;
149
- let p2 = s0;
150
- let p3 = s4;
151
- let t0 = stbi_fsh ( p2 + p3) ;
152
- let t1 = stbi_fsh ( p2 - p3) ;
153
- let x0 = t0 + t3;
154
- let x3 = t0 - t3;
155
- let x1 = t1 + t2;
156
- let x2 = t1 - t2;
157
- let t0 = s7;
158
- let t1 = s5;
159
- let t2 = s3;
160
- let t3 = s1;
161
- let p3 = t0 + t2;
162
- let p4 = t1 + t3;
163
- let p1 = t0 + t3;
164
- let p2 = t1 + t2;
165
- let p5 = ( p3 + p4) * stbi_f2f ( 1.175875602 ) ;
166
- let t0 = t0 * stbi_f2f ( 0.298631336 ) ;
167
- let t1 = t1 * stbi_f2f ( 2.053119869 ) ;
168
- let t2 = t2 * stbi_f2f ( 3.072711026 ) ;
169
- let t3 = t3 * stbi_f2f ( 1.501321110 ) ;
170
- let p1 = p5 + p1 * stbi_f2f ( -0.899976223 ) ;
171
- let p2 = p5 + p2 * stbi_f2f ( -2.562915447 ) ;
172
- let p3 = p3 * stbi_f2f ( -1.961570560 ) ;
173
- let p4 = p4 * stbi_f2f ( -0.390180644 ) ;
174
- let t3 = t3 + p1 + p4;
175
- let t2 = t2 + p2 + p3;
176
- let t1 = t1 + p2 + p4;
177
- let t0 = t0 + p1 + p3;
131
+ for ( chunk, output_chunk) in temp. chunks_exact ( 8 ) . zip ( output) {
132
+ let chunk = <& [ _ ; 8 ] >:: try_from ( chunk) . unwrap ( ) ;
178
133
179
134
// constants scaled things up by 1<<12, plus we had 1<<2 from first
180
135
// loop, plus horizontal and vertical each scale by sqrt(8) so together
181
136
// we've got an extra 1<<3, so 1<<17 total we need to remove.
182
137
// so we want to round that, which means adding 0.5 * 1<<17,
183
138
// aka 65536. Also, we'll end up with -128 to 127 that we want
184
139
// to encode as 0..255 by adding 128, so we'll add that before the shift
185
- let x0 = x0 + Wrapping ( 65536 + ( 128 << 17 ) ) ;
186
- let x1 = x1 + Wrapping ( 65536 + ( 128 << 17 ) ) ;
187
- let x2 = x2 + Wrapping ( 65536 + ( 128 << 17 ) ) ;
188
- let x3 = x3 + Wrapping ( 65536 + ( 128 << 17 ) ) ;
189
-
190
- output[ i * output_linestride] = stbi_clamp ( ( x0 + t3) >> 17 ) ;
191
- output[ i * output_linestride + 7 ] = stbi_clamp ( ( x0 - t3) >> 17 ) ;
192
- output[ i * output_linestride + 1 ] = stbi_clamp ( ( x1 + t2) >> 17 ) ;
193
- output[ i * output_linestride + 6 ] = stbi_clamp ( ( x1 - t2) >> 17 ) ;
194
- output[ i * output_linestride + 2 ] = stbi_clamp ( ( x2 + t1) >> 17 ) ;
195
- output[ i * output_linestride + 5 ] = stbi_clamp ( ( x2 - t1) >> 17 ) ;
196
- output[ i * output_linestride + 3 ] = stbi_clamp ( ( x3 + t0) >> 17 ) ;
197
- output[ i * output_linestride + 4 ] = stbi_clamp ( ( x3 - t0) >> 17 ) ;
140
+ const X_SCALE : i32 = 65536 + ( 128 << 17 ) ;
141
+
142
+ // TODO When the minimum rust version supports it
143
+ // let [s0, rest @ ..] = chunk;
144
+ let ( s0, rest) = chunk. split_first ( ) . unwrap ( ) ;
145
+ if * rest == [ Wrapping ( 0 ) ; 7 ] {
146
+ let dcterm = stbi_clamp ( ( stbi_fsh ( * s0) + Wrapping ( X_SCALE ) ) >> 17 ) ;
147
+ output_chunk[ 0 ] = dcterm;
148
+ output_chunk[ 1 ] = dcterm;
149
+ output_chunk[ 2 ] = dcterm;
150
+ output_chunk[ 3 ] = dcterm;
151
+ output_chunk[ 4 ] = dcterm;
152
+ output_chunk[ 5 ] = dcterm;
153
+ output_chunk[ 6 ] = dcterm;
154
+ output_chunk[ 7 ] = dcterm;
155
+ } else {
156
+ let Kernel {
157
+ xs : [ x0, x1, x2, x3] ,
158
+ ts : [ t0, t1, t2, t3] ,
159
+ } = kernel ( * chunk, X_SCALE ) ;
160
+
161
+ output_chunk[ 0 ] = stbi_clamp ( ( x0 + t3) >> 17 ) ;
162
+ output_chunk[ 7 ] = stbi_clamp ( ( x0 - t3) >> 17 ) ;
163
+ output_chunk[ 1 ] = stbi_clamp ( ( x1 + t2) >> 17 ) ;
164
+ output_chunk[ 6 ] = stbi_clamp ( ( x1 - t2) >> 17 ) ;
165
+ output_chunk[ 2 ] = stbi_clamp ( ( x2 + t1) >> 17 ) ;
166
+ output_chunk[ 5 ] = stbi_clamp ( ( x2 - t1) >> 17 ) ;
167
+ output_chunk[ 3 ] = stbi_clamp ( ( x3 + t0) >> 17 ) ;
168
+ output_chunk[ 4 ] = stbi_clamp ( ( x3 - t0) >> 17 ) ;
169
+ }
170
+ }
171
+ }
172
+
173
+ struct Kernel {
174
+ xs : [ Wrapping < i32 > ; 4 ] ,
175
+ ts : [ Wrapping < i32 > ; 4 ] ,
176
+ }
177
+
178
+ #[ inline]
179
+ fn kernel_x ( [ s0, s2, s4, s6] : [ Wrapping < i32 > ; 4 ] , x_scale : i32 ) -> [ Wrapping < i32 > ; 4 ] {
180
+ // Even `chunk` indicies
181
+ let ( t2, t3) ;
182
+ {
183
+ let p2 = s2;
184
+ let p3 = s6;
185
+
186
+ let p1 = ( p2 + p3) * stbi_f2f ( 0.5411961 ) ;
187
+ t2 = p1 + p3 * stbi_f2f ( -1.847759065 ) ;
188
+ t3 = p1 + p2 * stbi_f2f ( 0.765366865 ) ;
189
+ }
190
+
191
+ let ( t0, t1) ;
192
+ {
193
+ let p2 = s0;
194
+ let p3 = s4;
195
+
196
+ t0 = stbi_fsh ( p2 + p3) ;
197
+ t1 = stbi_fsh ( p2 - p3) ;
198
198
}
199
+
200
+ let x0 = t0 + t3;
201
+ let x3 = t0 - t3;
202
+ let x1 = t1 + t2;
203
+ let x2 = t1 - t2;
204
+
205
+ let x_scale = Wrapping ( x_scale) ;
206
+
207
+ [ x0 + x_scale, x1 + x_scale, x2 + x_scale, x3 + x_scale]
208
+ }
209
+
210
+ #[ inline]
211
+ fn kernel_t ( [ s1, s3, s5, s7] : [ Wrapping < i32 > ; 4 ] ) -> [ Wrapping < i32 > ; 4 ] {
212
+ // Odd `chunk` indicies
213
+ let mut t0 = s7;
214
+ let mut t1 = s5;
215
+ let mut t2 = s3;
216
+ let mut t3 = s1;
217
+
218
+ let p3 = t0 + t2;
219
+ let p4 = t1 + t3;
220
+ let p1 = t0 + t3;
221
+ let p2 = t1 + t2;
222
+ let p5 = ( p3 + p4) * stbi_f2f ( 1.175875602 ) ;
223
+
224
+ t0 *= stbi_f2f ( 0.298631336 ) ;
225
+ t1 *= stbi_f2f ( 2.053119869 ) ;
226
+ t2 *= stbi_f2f ( 3.072711026 ) ;
227
+ t3 *= stbi_f2f ( 1.501321110 ) ;
228
+
229
+ let p1 = p5 + p1 * stbi_f2f ( -0.899976223 ) ;
230
+ let p2 = p5 + p2 * stbi_f2f ( -2.562915447 ) ;
231
+ let p3 = p3 * stbi_f2f ( -1.961570560 ) ;
232
+ let p4 = p4 * stbi_f2f ( -0.390180644 ) ;
233
+
234
+ t3 += p1 + p4;
235
+ t2 += p2 + p3;
236
+ t1 += p2 + p4;
237
+ t0 += p1 + p3;
238
+
239
+ [ t0, t1, t2, t3]
240
+ }
241
+
242
+ #[ inline]
243
+ fn kernel ( [ s0, s1, s2, s3, s4, s5, s6, s7] : [ Wrapping < i32 > ; 8 ] , x_scale : i32 ) -> Kernel {
244
+ Kernel {
245
+ xs : kernel_x ( [ s0, s2, s4, s6] , x_scale) ,
246
+ ts : kernel_t ( [ s1, s3, s5, s7] ) ,
247
+ }
248
+ }
249
+
250
+ #[ inline( always) ]
251
+ fn dequantize ( c : i16 , q : u16 ) -> Wrapping < i32 > {
252
+ Wrapping ( i32:: from ( c) * i32:: from ( q) )
199
253
}
200
254
201
255
// 4x4 and 2x2 IDCT based on Rakesh Dugad and Narendra Ahuja: "A Fast Scheme for Image Size Change in the Compressed Domain" (2001).
0 commit comments