|
| 1 | +From 5c44459c3b28a9bd3283aaceab7c615f8020c531 Mon Sep 17 00:00:00 2001 |
| 2 | +From: Mark Adler < [email protected]> |
| 3 | +Date: Tue, 17 Apr 2018 22:09:22 -0700 |
| 4 | +Subject: [PATCH] Fix a bug that can crash deflate on some input when using |
| 5 | + Z_FIXED. |
| 6 | + |
| 7 | +This bug was reported by Danilo Ramos of Eideticom, Inc. It has |
| 8 | +lain in wait 13 years before being found! The bug was introduced |
| 9 | +in zlib 1.2.2.2, with the addition of the Z_FIXED option. That |
| 10 | +option forces the use of fixed Huffman codes. For rare inputs with |
| 11 | +a large number of distant matches, the pending buffer into which |
| 12 | +the compressed data is written can overwrite the distance symbol |
| 13 | +table which it overlays. That results in corrupted output due to |
| 14 | +invalid distances, and can result in out-of-bound accesses, |
| 15 | +crashing the application. |
| 16 | + |
| 17 | +The fix here combines the distance buffer and literal/length |
| 18 | +buffers into a single symbol buffer. Now three bytes of pending |
| 19 | +buffer space are opened up for each literal or length/distance |
| 20 | +pair consumed, instead of the previous two bytes. This assures |
| 21 | +that the pending buffer cannot overwrite the symbol table, since |
| 22 | +the maximum fixed code compressed length/distance is 31 bits, and |
| 23 | +since there are four bytes of pending space for every three bytes |
| 24 | +of symbol space. |
| 25 | +--- |
| 26 | + deflate.c | 74 ++++++++++++++++++++++++++++++++++++++++--------------- |
| 27 | + deflate.h | 25 +++++++++---------- |
| 28 | + trees.c | 50 +++++++++++-------------------------- |
| 29 | + 3 files changed, 79 insertions(+), 70 deletions(-) |
| 30 | + |
| 31 | +diff --git a/deflate.c b/deflate.c |
| 32 | +index 425babc00..19cba873a 100644 |
| 33 | +--- a/deflate.c |
| 34 | ++++ b/deflate.c |
| 35 | +@@ -255,11 +255,6 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, |
| 36 | + int wrap = 1; |
| 37 | + static const char my_version[] = ZLIB_VERSION; |
| 38 | + |
| 39 | +- ushf *overlay; |
| 40 | +- /* We overlay pending_buf and d_buf+l_buf. This works since the average |
| 41 | +- * output size for (length,distance) codes is <= 24 bits. |
| 42 | +- */ |
| 43 | +- |
| 44 | + if (version == Z_NULL || version[0] != my_version[0] || |
| 45 | + stream_size != sizeof(z_stream)) { |
| 46 | + return Z_VERSION_ERROR; |
| 47 | +@@ -329,9 +324,47 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, |
| 48 | + |
| 49 | + s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ |
| 50 | + |
| 51 | +- overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); |
| 52 | +- s->pending_buf = (uchf *) overlay; |
| 53 | +- s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); |
| 54 | ++ /* We overlay pending_buf and sym_buf. This works since the average size |
| 55 | ++ * for length/distance pairs over any compressed block is assured to be 31 |
| 56 | ++ * bits or less. |
| 57 | ++ * |
| 58 | ++ * Analysis: The longest fixed codes are a length code of 8 bits plus 5 |
| 59 | ++ * extra bits, for lengths 131 to 257. The longest fixed distance codes are |
| 60 | ++ * 5 bits plus 13 extra bits, for distances 16385 to 32768. The longest |
| 61 | ++ * possible fixed-codes length/distance pair is then 31 bits total. |
| 62 | ++ * |
| 63 | ++ * sym_buf starts one-fourth of the way into pending_buf. So there are |
| 64 | ++ * three bytes in sym_buf for every four bytes in pending_buf. Each symbol |
| 65 | ++ * in sym_buf is three bytes -- two for the distance and one for the |
| 66 | ++ * literal/length. As each symbol is consumed, the pointer to the next |
| 67 | ++ * sym_buf value to read moves forward three bytes. From that symbol, up to |
| 68 | ++ * 31 bits are written to pending_buf. The closest the written pending_buf |
| 69 | ++ * bits gets to the next sym_buf symbol to read is just before the last |
| 70 | ++ * code is written. At that time, 31*(n-2) bits have been written, just |
| 71 | ++ * after 24*(n-2) bits have been consumed from sym_buf. sym_buf starts at |
| 72 | ++ * 8*n bits into pending_buf. (Note that the symbol buffer fills when n-1 |
| 73 | ++ * symbols are written.) The closest the writing gets to what is unread is |
| 74 | ++ * then n+14 bits. Here n is lit_bufsize, which is 16384 by default, and |
| 75 | ++ * can range from 128 to 32768. |
| 76 | ++ * |
| 77 | ++ * Therefore, at a minimum, there are 142 bits of space between what is |
| 78 | ++ * written and what is read in the overlain buffers, so the symbols cannot |
| 79 | ++ * be overwritten by the compressed data. That space is actually 139 bits, |
| 80 | ++ * due to the three-bit fixed-code block header. |
| 81 | ++ * |
| 82 | ++ * That covers the case where either Z_FIXED is specified, forcing fixed |
| 83 | ++ * codes, or when the use of fixed codes is chosen, because that choice |
| 84 | ++ * results in a smaller compressed block than dynamic codes. That latter |
| 85 | ++ * condition then assures that the above analysis also covers all dynamic |
| 86 | ++ * blocks. A dynamic-code block will only be chosen to be emitted if it has |
| 87 | ++ * fewer bits than a fixed-code block would for the same set of symbols. |
| 88 | ++ * Therefore its average symbol length is assured to be less than 31. So |
| 89 | ++ * the compressed data for a dynamic block also cannot overwrite the |
| 90 | ++ * symbols from which it is being constructed. |
| 91 | ++ */ |
| 92 | ++ |
| 93 | ++ s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4); |
| 94 | ++ s->pending_buf_size = (ulg)s->lit_bufsize * 4; |
| 95 | + |
| 96 | + if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || |
| 97 | + s->pending_buf == Z_NULL) { |
| 98 | +@@ -340,8 +373,12 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, |
| 99 | + deflateEnd (strm); |
| 100 | + return Z_MEM_ERROR; |
| 101 | + } |
| 102 | +- s->d_buf = overlay + s->lit_bufsize/sizeof(ush); |
| 103 | +- s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; |
| 104 | ++ s->sym_buf = s->pending_buf + s->lit_bufsize; |
| 105 | ++ s->sym_end = (s->lit_bufsize - 1) * 3; |
| 106 | ++ /* We avoid equality with lit_bufsize*3 because of wraparound at 64K |
| 107 | ++ * on 16 bit machines and because stored blocks are restricted to |
| 108 | ++ * 64K-1 bytes. |
| 109 | ++ */ |
| 110 | + |
| 111 | + s->level = level; |
| 112 | + s->strategy = strategy; |
| 113 | +@@ -552,7 +589,7 @@ int ZEXPORT deflatePrime (strm, bits, value) |
| 114 | + |
| 115 | + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; |
| 116 | + s = strm->state; |
| 117 | +- if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3)) |
| 118 | ++ if (s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3)) |
| 119 | + return Z_BUF_ERROR; |
| 120 | + do { |
| 121 | + put = Buf_size - s->bi_valid; |
| 122 | +@@ -1113,7 +1150,6 @@ int ZEXPORT deflateCopy (dest, source) |
| 123 | + #else |
| 124 | + deflate_state *ds; |
| 125 | + deflate_state *ss; |
| 126 | +- ushf *overlay; |
| 127 | + |
| 128 | + |
| 129 | + if (deflateStateCheck(source) || dest == Z_NULL) { |
| 130 | +@@ -1133,8 +1169,7 @@ int ZEXPORT deflateCopy (dest, source) |
| 131 | + ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); |
| 132 | + ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); |
| 133 | + ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); |
| 134 | +- overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); |
| 135 | +- ds->pending_buf = (uchf *) overlay; |
| 136 | ++ ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, 4); |
| 137 | + |
| 138 | + if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || |
| 139 | + ds->pending_buf == Z_NULL) { |
| 140 | +@@ -1148,8 +1183,7 @@ int ZEXPORT deflateCopy (dest, source) |
| 141 | + zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); |
| 142 | + |
| 143 | + ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); |
| 144 | +- ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); |
| 145 | +- ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; |
| 146 | ++ ds->sym_buf = ds->pending_buf + ds->lit_bufsize; |
| 147 | + |
| 148 | + ds->l_desc.dyn_tree = ds->dyn_ltree; |
| 149 | + ds->d_desc.dyn_tree = ds->dyn_dtree; |
| 150 | +@@ -1925,7 +1959,7 @@ local block_state deflate_fast(s, flush) |
| 151 | + FLUSH_BLOCK(s, 1); |
| 152 | + return finish_done; |
| 153 | + } |
| 154 | +- if (s->last_lit) |
| 155 | ++ if (s->sym_next) |
| 156 | + FLUSH_BLOCK(s, 0); |
| 157 | + return block_done; |
| 158 | + } |
| 159 | +@@ -2056,7 +2090,7 @@ local block_state deflate_slow(s, flush) |
| 160 | + FLUSH_BLOCK(s, 1); |
| 161 | + return finish_done; |
| 162 | + } |
| 163 | +- if (s->last_lit) |
| 164 | ++ if (s->sym_next) |
| 165 | + FLUSH_BLOCK(s, 0); |
| 166 | + return block_done; |
| 167 | + } |
| 168 | +@@ -2131,7 +2165,7 @@ local block_state deflate_rle(s, flush) |
| 169 | + FLUSH_BLOCK(s, 1); |
| 170 | + return finish_done; |
| 171 | + } |
| 172 | +- if (s->last_lit) |
| 173 | ++ if (s->sym_next) |
| 174 | + FLUSH_BLOCK(s, 0); |
| 175 | + return block_done; |
| 176 | + } |
| 177 | +@@ -2170,7 +2204,7 @@ local block_state deflate_huff(s, flush) |
| 178 | + FLUSH_BLOCK(s, 1); |
| 179 | + return finish_done; |
| 180 | + } |
| 181 | +- if (s->last_lit) |
| 182 | ++ if (s->sym_next) |
| 183 | + FLUSH_BLOCK(s, 0); |
| 184 | + return block_done; |
| 185 | + } |
| 186 | +diff --git a/deflate.h b/deflate.h |
| 187 | +index 23ecdd312..d4cf1a98b 100644 |
| 188 | +--- a/deflate.h |
| 189 | ++++ b/deflate.h |
| 190 | +@@ -217,7 +217,7 @@ typedef struct internal_state { |
| 191 | + /* Depth of each subtree used as tie breaker for trees of equal frequency |
| 192 | + */ |
| 193 | + |
| 194 | +- uchf *l_buf; /* buffer for literals or lengths */ |
| 195 | ++ uchf *sym_buf; /* buffer for distances and literals/lengths */ |
| 196 | + |
| 197 | + uInt lit_bufsize; |
| 198 | + /* Size of match buffer for literals/lengths. There are 4 reasons for |
| 199 | +@@ -239,13 +239,8 @@ typedef struct internal_state { |
| 200 | + * - I can't count above 4 |
| 201 | + */ |
| 202 | + |
| 203 | +- uInt last_lit; /* running index in l_buf */ |
| 204 | +- |
| 205 | +- ushf *d_buf; |
| 206 | +- /* Buffer for distances. To simplify the code, d_buf and l_buf have |
| 207 | +- * the same number of elements. To use different lengths, an extra flag |
| 208 | +- * array would be necessary. |
| 209 | +- */ |
| 210 | ++ uInt sym_next; /* running index in sym_buf */ |
| 211 | ++ uInt sym_end; /* symbol table full when sym_next reaches this */ |
| 212 | + |
| 213 | + ulg opt_len; /* bit length of current block with optimal trees */ |
| 214 | + ulg static_len; /* bit length of current block with static trees */ |
| 215 | +@@ -325,20 +320,22 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, |
| 216 | + |
| 217 | + # define _tr_tally_lit(s, c, flush) \ |
| 218 | + { uch cc = (c); \ |
| 219 | +- s->d_buf[s->last_lit] = 0; \ |
| 220 | +- s->l_buf[s->last_lit++] = cc; \ |
| 221 | ++ s->sym_buf[s->sym_next++] = 0; \ |
| 222 | ++ s->sym_buf[s->sym_next++] = 0; \ |
| 223 | ++ s->sym_buf[s->sym_next++] = cc; \ |
| 224 | + s->dyn_ltree[cc].Freq++; \ |
| 225 | +- flush = (s->last_lit == s->lit_bufsize-1); \ |
| 226 | ++ flush = (s->sym_next == s->sym_end); \ |
| 227 | + } |
| 228 | + # define _tr_tally_dist(s, distance, length, flush) \ |
| 229 | + { uch len = (uch)(length); \ |
| 230 | + ush dist = (ush)(distance); \ |
| 231 | +- s->d_buf[s->last_lit] = dist; \ |
| 232 | +- s->l_buf[s->last_lit++] = len; \ |
| 233 | ++ s->sym_buf[s->sym_next++] = dist; \ |
| 234 | ++ s->sym_buf[s->sym_next++] = dist >> 8; \ |
| 235 | ++ s->sym_buf[s->sym_next++] = len; \ |
| 236 | + dist--; \ |
| 237 | + s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ |
| 238 | + s->dyn_dtree[d_code(dist)].Freq++; \ |
| 239 | +- flush = (s->last_lit == s->lit_bufsize-1); \ |
| 240 | ++ flush = (s->sym_next == s->sym_end); \ |
| 241 | + } |
| 242 | + #else |
| 243 | + # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) |
| 244 | +diff --git a/trees.c b/trees.c |
| 245 | +index 4f4a65011..decaeb7c3 100644 |
| 246 | +--- a/trees.c |
| 247 | ++++ b/trees.c |
| 248 | +@@ -416,7 +416,7 @@ local void init_block(s) |
| 249 | + |
| 250 | + s->dyn_ltree[END_BLOCK].Freq = 1; |
| 251 | + s->opt_len = s->static_len = 0L; |
| 252 | +- s->last_lit = s->matches = 0; |
| 253 | ++ s->sym_next = s->matches = 0; |
| 254 | + } |
| 255 | + |
| 256 | + #define SMALLEST 1 |
| 257 | +@@ -948,7 +948,7 @@ void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last) |
| 258 | + |
| 259 | + Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", |
| 260 | + opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, |
| 261 | +- s->last_lit)); |
| 262 | ++ s->sym_next / 3)); |
| 263 | + |
| 264 | + if (static_lenb <= opt_lenb) opt_lenb = static_lenb; |
| 265 | + |
| 266 | +@@ -1017,8 +1017,9 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc) |
| 267 | + unsigned dist; /* distance of matched string */ |
| 268 | + unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ |
| 269 | + { |
| 270 | +- s->d_buf[s->last_lit] = (ush)dist; |
| 271 | +- s->l_buf[s->last_lit++] = (uch)lc; |
| 272 | ++ s->sym_buf[s->sym_next++] = dist; |
| 273 | ++ s->sym_buf[s->sym_next++] = dist >> 8; |
| 274 | ++ s->sym_buf[s->sym_next++] = lc; |
| 275 | + if (dist == 0) { |
| 276 | + /* lc is the unmatched char */ |
| 277 | + s->dyn_ltree[lc].Freq++; |
| 278 | +@@ -1033,30 +1034,7 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc) |
| 279 | + s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; |
| 280 | + s->dyn_dtree[d_code(dist)].Freq++; |
| 281 | + } |
| 282 | +- |
| 283 | +-#ifdef TRUNCATE_BLOCK |
| 284 | +- /* Try to guess if it is profitable to stop the current block here */ |
| 285 | +- if ((s->last_lit & 0x1fff) == 0 && s->level > 2) { |
| 286 | +- /* Compute an upper bound for the compressed length */ |
| 287 | +- ulg out_length = (ulg)s->last_lit*8L; |
| 288 | +- ulg in_length = (ulg)((long)s->strstart - s->block_start); |
| 289 | +- int dcode; |
| 290 | +- for (dcode = 0; dcode < D_CODES; dcode++) { |
| 291 | +- out_length += (ulg)s->dyn_dtree[dcode].Freq * |
| 292 | +- (5L+extra_dbits[dcode]); |
| 293 | +- } |
| 294 | +- out_length >>= 3; |
| 295 | +- Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", |
| 296 | +- s->last_lit, in_length, out_length, |
| 297 | +- 100L - out_length*100L/in_length)); |
| 298 | +- if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; |
| 299 | +- } |
| 300 | +-#endif |
| 301 | +- return (s->last_lit == s->lit_bufsize-1); |
| 302 | +- /* We avoid equality with lit_bufsize because of wraparound at 64K |
| 303 | +- * on 16 bit machines and because stored blocks are restricted to |
| 304 | +- * 64K-1 bytes. |
| 305 | +- */ |
| 306 | ++ return (s->sym_next == s->sym_end); |
| 307 | + } |
| 308 | + |
| 309 | + /* =========================================================================== |
| 310 | +@@ -1069,13 +1047,14 @@ local void compress_block(s, ltree, dtree) |
| 311 | + { |
| 312 | + unsigned dist; /* distance of matched string */ |
| 313 | + int lc; /* match length or unmatched char (if dist == 0) */ |
| 314 | +- unsigned lx = 0; /* running index in l_buf */ |
| 315 | ++ unsigned sx = 0; /* running index in sym_buf */ |
| 316 | + unsigned code; /* the code to send */ |
| 317 | + int extra; /* number of extra bits to send */ |
| 318 | + |
| 319 | +- if (s->last_lit != 0) do { |
| 320 | +- dist = s->d_buf[lx]; |
| 321 | +- lc = s->l_buf[lx++]; |
| 322 | ++ if (s->sym_next != 0) do { |
| 323 | ++ dist = s->sym_buf[sx++] & 0xff; |
| 324 | ++ dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8; |
| 325 | ++ lc = s->sym_buf[sx++]; |
| 326 | + if (dist == 0) { |
| 327 | + send_code(s, lc, ltree); /* send a literal byte */ |
| 328 | + Tracecv(isgraph(lc), (stderr," '%c' ", lc)); |
| 329 | +@@ -1100,11 +1079,10 @@ local void compress_block(s, ltree, dtree) |
| 330 | + } |
| 331 | + } /* literal or match pair ? */ |
| 332 | + |
| 333 | +- /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ |
| 334 | +- Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx, |
| 335 | +- "pendingBuf overflow"); |
| 336 | ++ /* Check that the overlay between pending_buf and sym_buf is ok: */ |
| 337 | ++ Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow"); |
| 338 | + |
| 339 | +- } while (lx < s->last_lit); |
| 340 | ++ } while (sx < s->sym_next); |
| 341 | + |
| 342 | + send_code(s, END_BLOCK, ltree); |
| 343 | + } |
0 commit comments