Skip to content

Commit 231493e

Browse files
committed
implement limitation of code length
1 parent 82fa770 commit 231493e

File tree

2 files changed

+74
-7
lines changed

2 files changed

+74
-7
lines changed

huff_dec.c

+9-3
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ bool huff_gen_dec(uint8_t code_len[restrict 16], uint8_t symbols[restrict],
3535
assert(0 < max_bits && max_bits <= 16);
3636
assert(0 < min_bits && min_bits <= 16);
3737

38-
uint16_t num_entries = 1 << max_bits;
38+
uint32_t num_entries = 1 << max_bits;
3939
assert(num_entries != 0);
4040

4141
decoder->max_bits = max_bits;
@@ -66,6 +66,9 @@ bool huff_gen_dec(uint8_t code_len[restrict 16], uint8_t symbols[restrict],
6666
}
6767
}
6868

69+
/* if the kraft sum is less than 1 then index is less than num_entries
70+
* this isn't really an error, there are just some checks in the decoding
71+
* missing. */
6972
if (index != num_entries) {
7073
fprintf(stderr, "Invalid decode header. Missing entries in decode table\n");
7174
free(decoder->entries);
@@ -141,14 +144,16 @@ bool huff_decode_file(const struct huff_dec * restrict decoder, size_t num_sym,
141144
to push back bits in bit_reader and extend decoding to decode at any bit
142145
position. */
143146

144-
uint16_t code;
147+
uint32_t code;
145148
uint8_t max_bits = decoder->max_bits;
146149
const uint16_t mask = (1 << max_bits) - 1;
147150
uint16_t *table = decoder->entries;
148151

149152
/* FIXME special case with less than max_bits in the data stream. */
150153
/* FIXME handle EOF correctly */
151-
bit_reader_next_bits(reader, &code, max_bits);
154+
uint16_t code16;
155+
bit_reader_next_bits(reader, &code16, max_bits);
156+
code = code16;
152157
/*
153158
fprintf(stderr, "Error while reading input\n");
154159
return false;
@@ -177,6 +182,7 @@ bool huff_decode_file(const struct huff_dec * restrict decoder, size_t num_sym,
177182
return false;
178183
}*/
179184

185+
/* FIXME this can overflow when code is uint16_t */
180186
code = (code << length) | tmp;
181187
}
182188

huff_enc.c

+65-4
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ static bool gen_code_lengths(uint16_t num_sym, const uint32_t freq[restrict],
3535
static void gen_canonical_codes(uint16_t num_codes,
3636
struct huff_code codes[restrict],
3737
struct huff_enc_info * restrict info);
38+
static bool limit_length(uint16_t num_codes, struct huff_code codes[restrict],
39+
uint8_t limit);
3840

3941
bool huff_gen_enc(const uint32_t freq[restrict 256],
4042
struct huff_enc * restrict encoder,
@@ -64,7 +66,7 @@ bool huff_gen_enc(const uint32_t freq[restrict 256],
6466

6567
/* generate huffman code lengths */
6668
gen_code_lengths(num_sym, freq, codes);
67-
//limit_length(num_sym, codes);
69+
limit_length(num_sym, codes, 16);
6870

6971
/* generate canonical huffman codes */
7072
gen_canonical_codes(num_sym, codes, info);
@@ -156,9 +158,6 @@ static void set_len(const struct node *node, uint8_t depth)
156158
{
157159
assert(node != NULL);
158160

159-
if (depth > 16) {
160-
fprintf(stderr, "Warning: Code length over 16!\n");
161-
}
162161
if (node->left == NULL && node->right == NULL) {
163162
assert(node->code != NULL);
164163
node->code->code_len = depth;
@@ -249,6 +248,68 @@ static bool gen_code_lengths(uint16_t num_sym, const uint32_t freq[restrict],
249248
return true;
250249
}
251250

251+
static int len_cmp(const void *left, const void *right)
252+
{
253+
return ((const struct huff_code *)left)->code_len -
254+
((const struct huff_code *)right)->code_len;
255+
}
256+
257+
static bool limit_length(uint16_t num_codes, struct huff_code codes[restrict],
258+
uint8_t limit)
259+
{
260+
assert(num_codes > 0);
261+
assert(codes != NULL);
262+
assert(limit > 1);
263+
264+
// sort by code_len
265+
qsort(codes, num_codes, sizeof(struct huff_code), len_cmp);
266+
267+
const uint32_t n = 1 << limit;
268+
269+
uint32_t kraft_sum = 0;
270+
271+
for (uint16_t i = 0; i < num_codes; i++) {
272+
if (codes[i].code_len > limit)
273+
codes[i].code_len = limit;
274+
kraft_sum += n >> codes[i].code_len;
275+
}
276+
277+
if (kraft_sum > n) {
278+
for (uint16_t i = 0; i < num_codes; i++) {
279+
uint16_t index = num_codes - i - 1;
280+
if (codes[index].code_len == limit)
281+
continue;
282+
283+
if (kraft_sum <= n)
284+
break;
285+
286+
codes[index].code_len++;
287+
kraft_sum -= n >> codes[index].code_len;
288+
}
289+
}
290+
291+
if (kraft_sum < n) {
292+
uint32_t kraft_diff = n - kraft_sum;
293+
294+
for (uint16_t i = 0; i < num_codes; i++) {
295+
if (codes[i].code_len == 1)
296+
continue;
297+
298+
if (n >> (codes[i].code_len) > kraft_diff)
299+
continue;
300+
301+
if (kraft_sum == n || kraft_diff == 0)
302+
break;
303+
304+
kraft_sum += n >> codes[i].code_len;
305+
kraft_diff -= n >> codes[i].code_len;
306+
codes[i].code_len--;
307+
}
308+
}
309+
310+
return true;
311+
}
312+
252313
static void gen_canonical_codes(uint16_t num_codes,
253314
struct huff_code codes[restrict],
254315
struct huff_enc_info * restrict info) {

0 commit comments

Comments
 (0)