1
+ /* blast.c
2
+ * Copyright (C) 2003, 2012, 2013 Mark Adler
3
+ * For conditions of distribution and use, see copyright notice in blast.h
4
+ * version 1.3, 24 Aug 2013
5
+ *
6
+ * blast.c decompresses data compressed by the PKWare Compression Library.
7
+ * This function provides functionality similar to the explode() function of
8
+ * the PKWare library, hence the name "blast".
9
+ *
10
+ * This decompressor is based on the excellent format description provided by
11
+ * Ben Rudiak-Gould in comp.compression on August 13, 2001. Interestingly, the
12
+ * example Ben provided in the post is incorrect. The distance 110001 should
13
+ * instead be 111000. When corrected, the example byte stream becomes:
14
+ *
15
+ * 00 04 82 24 25 8f 80 7f
16
+ *
17
+ * which decompresses to "AIAIAIAIAIAIA" (without the quotes).
18
+ */
19
+
20
+ /*
21
+ * Change history:
22
+ *
23
+ * 1.0 12 Feb 2003 - First version
24
+ * 1.1 16 Feb 2003 - Fixed distance check for > 4 GB uncompressed data
25
+ * 1.2 24 Oct 2012 - Add note about using binary mode in stdio
26
+ * - Fix comparisons of differently signed integers
27
+ * 1.3 24 Aug 2013 - Return unused input from blast()
28
+ * - Fix test code to correctly report unused input
29
+ * - Enable the provision of initial input to blast()
30
+ */
31
+
32
+ using System ;
33
+ using System . Collections . Generic ;
34
+ using static SabreTools . Compression . Blast . Constants ;
35
+
36
+ namespace SabreTools . Compression . Blast
37
+ {
38
+ public unsafe static class BlastDecoder
39
+ {
40
+ #region Huffman Encoding
41
+
42
+ /// <summary>
43
+ /// Literal code
44
+ /// </summary>
45
+ private static readonly Huffman litcode = new ( MAXBITS + 1 , 256 ) ;
46
+
47
+ /// <summary>
48
+ /// Length code
49
+ /// </summary>
50
+ private static readonly Huffman lencode = new ( MAXBITS + 1 , 16 ) ;
51
+
52
+ /// <summary>
53
+ /// Distance code
54
+ /// </summary>
55
+ private static readonly Huffman distcode = new ( MAXBITS + 1 , 64 ) ;
56
+
57
+ /// <summary>
58
+ /// Base for length codes
59
+ /// </summary>
60
+ private static readonly short [ ] baseLength =
61
+ [
62
+ 3 , 2 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 12 , 16 , 24 , 40 , 72 , 136 , 264
63
+ ] ;
64
+
65
+ /// <summary>
66
+ /// Extra bits for length codes
67
+ /// </summary>
68
+ private static readonly byte [ ] extra =
69
+ [
70
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8
71
+ ] ;
72
+
73
+ #endregion
74
+
75
+ /// <summary>
76
+ /// Static constructor
77
+ /// </summary>
78
+ static BlastDecoder ( )
79
+ {
80
+ // Repeated code lengths of literal codes
81
+ byte [ ] litlen =
82
+ [
83
+ 11 , 124 , 8 , 7 , 28 , 7 , 188 , 13 , 76 , 4 , 10 , 8 , 12 , 10 , 12 , 10 , 8 , 23 , 8 ,
84
+ 9 , 7 , 6 , 7 , 8 , 7 , 6 , 55 , 8 , 23 , 24 , 12 , 11 , 7 , 9 , 11 , 12 , 6 , 7 , 22 , 5 ,
85
+ 7 , 24 , 6 , 11 , 9 , 6 , 7 , 22 , 7 , 11 , 38 , 7 , 9 , 8 , 25 , 11 , 8 , 11 , 9 , 12 ,
86
+ 8 , 12 , 5 , 38 , 5 , 38 , 5 , 11 , 7 , 5 , 6 , 21 , 6 , 10 , 53 , 8 , 7 , 24 , 10 , 27 ,
87
+ 44 , 253 , 253 , 253 , 252 , 252 , 252 , 13 , 12 , 45 , 12 , 45 , 12 , 61 , 12 , 45 ,
88
+ 44 , 173
89
+ ] ;
90
+ litcode . Initialize ( litlen ) ;
91
+
92
+ // Repeated code lengths of length codes 0..15
93
+ byte [ ] lenlen =
94
+ [
95
+ 2 , 35 , 36 , 53 , 38 , 23
96
+ ] ;
97
+ lencode . Initialize ( lenlen ) ;
98
+
99
+ // Repeated code lengths of distance codes 0..63
100
+ byte [ ] distlen =
101
+ [
102
+ 2 , 20 , 53 , 230 , 247 , 151 , 248
103
+ ] ;
104
+ distcode . Initialize ( distlen ) ;
105
+ }
106
+
107
+ /// <summary>
108
+ /// blast() decompresses the PKWare Data Compression Library (DCL) compressed
109
+ /// format. It provides the same functionality as the explode() function in
110
+ /// that library. (Note: PKWare overused the "implode" verb, and the format
111
+ /// used by their library implode() function is completely different and
112
+ /// incompatible with the implode compression method supported by PKZIP.)
113
+ ///
114
+ /// The binary mode for stdio functions should be used to assure that the
115
+ /// compressed data is not corrupted when read or written. For example:
116
+ /// fopen(..., "rb") and fopen(..., "wb").
117
+ /// </summary>
118
+ public static int Blast ( byte [ ] inhow , List < byte > outhow )
119
+ {
120
+ // Input/output state
121
+ var state = new State ( inhow , outhow ) ;
122
+
123
+ // Attempt to decompress using the above state
124
+ int err ;
125
+ try
126
+ {
127
+ err = Decomp ( state ) ;
128
+ }
129
+ catch ( IndexOutOfRangeException )
130
+ {
131
+ // This was originally a jump, which is bad form for C#
132
+ err = 2 ;
133
+ }
134
+
135
+ // Write any leftover output and update the error code if needed
136
+ if ( err != 1 && state . Next != 0 && ! state . ProcessOutput ( ) && err == 0 )
137
+ err = 1 ;
138
+
139
+ return err ;
140
+ }
141
+
142
+ /// <summary>
143
+ /// Decode PKWare Compression Library stream.
144
+ /// </summary>
145
+ /// <remarks>
146
+ /// First byte is 0 if literals are uncoded or 1 if they are coded. Second
147
+ /// byte is 4, 5, or 6 for the number of extra bits in the distance code.
148
+ /// This is the base-2 logarithm of the dictionary size minus six.
149
+ ///
150
+ /// Compressed data is a combination of literals and length/distance pairs
151
+ /// terminated by an end code. Literals are either Huffman coded or
152
+ /// uncoded bytes. A length/distance pair is a coded length followed by a
153
+ /// coded distance to represent a string that occurs earlier in the
154
+ /// uncompressed data that occurs again at the current location.
155
+ ///
156
+ /// A bit preceding a literal or length/distance pair indicates which comes
157
+ /// next, 0 for literals, 1 for length/distance.
158
+ ///
159
+ /// If literals are uncoded, then the next eight bits are the literal, in the
160
+ /// normal bit order in the stream, i.e. no bit-reversal is needed. Similarly,
161
+ /// no bit reversal is needed for either the length extra bits or the distance
162
+ /// extra bits.
163
+ ///
164
+ /// Literal bytes are simply written to the output. A length/distance pair is
165
+ /// an instruction to copy previously uncompressed bytes to the output. The
166
+ /// copy is from distance bytes back in the output stream, copying for length
167
+ /// bytes.
168
+ ///
169
+ /// Distances pointing before the beginning of the output data are not
170
+ /// permitted.
171
+ ///
172
+ /// Overlapped copies, where the length is greater than the distance, are
173
+ /// allowed and common. For example, a distance of one and a length of 518
174
+ /// simply copies the last byte 518 times. A distance of four and a length of
175
+ /// twelve copies the last four bytes three times. A simple forward copy
176
+ /// ignoring whether the length is greater than the distance or not implements
177
+ /// this correctly.
178
+ /// </remarks>
179
+ private static int Decomp ( State state )
180
+ {
181
+ int symbol ; // decoded symbol, extra bits for distance
182
+ int len ; // length for copy
183
+ uint dist ; // distance for copy
184
+ int copy ; // copy counter
185
+ int from , to ; // copy pointers
186
+
187
+ // Read header
188
+ int lit = state . Bits ( 8 ) ; // true if literals are coded
189
+ if ( lit > 1 )
190
+ return - 1 ;
191
+
192
+ int dict = state . Bits ( 8 ) ; // log2(dictionary size) - 6
193
+ if ( dict < 4 || dict > 6 )
194
+ return - 2 ;
195
+
196
+ // Decode literals and length/distance pairs
197
+ do
198
+ {
199
+ if ( state . Bits ( 1 ) != 0 )
200
+ {
201
+ // Get length
202
+ symbol = lencode . Decode ( state ) ;
203
+ len = baseLength [ symbol ] + state . Bits ( extra [ symbol ] ) ;
204
+ if ( len == 519 )
205
+ break ; // end code
206
+
207
+ // Get distance
208
+ symbol = len == 2 ? 2 : dict ;
209
+ dist = ( uint ) ( distcode . Decode ( state ) << symbol ) ;
210
+ dist += ( uint ) state . Bits ( symbol ) ;
211
+ dist ++ ;
212
+ if ( state . First && dist > state . Next )
213
+ return - 3 ; //distance too far back
214
+
215
+ // Copy length bytes from distance bytes back
216
+ do
217
+ {
218
+ to = ( int ) ( state . OutputPtr + state . Next ) ;
219
+ from = ( int ) ( to - dist ) ;
220
+ copy = MAXWIN ;
221
+ if ( state . Next < dist )
222
+ {
223
+ from += copy ;
224
+ copy = ( int ) dist ;
225
+ }
226
+
227
+ copy -= ( int ) state . Next ;
228
+ if ( copy > len )
229
+ copy = len ;
230
+
231
+ len -= copy ;
232
+ state . Next += ( uint ) copy ;
233
+ do
234
+ {
235
+ state . Output [ to ++ ] = state . Output [ from ++ ] ;
236
+ }
237
+ while ( -- copy != 0 ) ;
238
+
239
+ if ( state . Next == MAXWIN )
240
+ {
241
+ if ( ! state . ProcessOutput ( ) )
242
+ return 1 ;
243
+
244
+ state . Next = 0 ;
245
+ state . First = false ;
246
+ }
247
+ }
248
+ while ( len != 0 ) ;
249
+ }
250
+ else
251
+ {
252
+ // Get literal and write it
253
+ symbol = lit != 0 ? litcode . Decode ( state ) : state . Bits ( 8 ) ;
254
+ state . Output [ state . Next ++ ] = ( byte ) symbol ;
255
+ if ( state . Next == MAXWIN )
256
+ {
257
+ if ( ! state . ProcessOutput ( ) )
258
+ return 1 ;
259
+
260
+ state . Next = 0 ;
261
+ state . First = false ;
262
+ }
263
+ }
264
+ }
265
+ while ( true ) ;
266
+
267
+ return 0 ;
268
+ }
269
+ }
270
+ }
0 commit comments