Skip to content

Commit d879e43

Browse files
authored
Create huffman_compress.cpp
compresses files
1 parent 6015b38 commit d879e43

File tree

1 file changed

+137
-0
lines changed

1 file changed

+137
-0
lines changed

huffman_compress.cpp

+137
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
/*
2+
*NO Error checking or handling whatsoever in this code
3+
*use at your own risk
4+
*@author shivam gupta
5+
*/
6+
7+
#include <cstdio>
8+
#include <iostream>
9+
#include <algorithm>
10+
#include <queue>
11+
#include <string>
12+
13+
#define OUT_OF_RANGE 256 //256 is out of range of a byte and is used in place of null
14+
15+
using namespace std;
16+
17+
struct treeNode {
18+
int ch;
19+
long long int freq;
20+
treeNode *left, *right;
21+
treeNode(int c, long long int f, treeNode *l, treeNode *r) : ch(c),freq(f),left(l),right(r) {}
22+
treeNode(const treeNode &t) : ch(t.ch), freq(t.freq),left(t.left),right(t.right) {}
23+
bool operator< (const treeNode &rhs) const {
24+
return this->freq > rhs.freq;
25+
}
26+
};
27+
28+
void codegenerator(treeNode *root, string &s, vector<string> &code) {
29+
if(!root) {
30+
return;
31+
}
32+
if(root->ch != OUT_OF_RANGE) { // ch not null
33+
code[root->ch] = s;
34+
cout << root->ch << " " << s<< endl;
35+
}
36+
s.push_back('0');
37+
codegenerator(root->left, s, code);
38+
s.erase(s.end()-1);
39+
s.push_back('1');
40+
codegenerator(root->right, s, code);
41+
s.erase(s.end()-1);
42+
}
43+
44+
class BitWriter {
45+
int bit_counter; //how many bits in current byte
46+
unsigned char curr_byte; //curr_byte to write later
47+
FILE *fp;
48+
public:
49+
BitWriter(FILE *f) : bit_counter(0), curr_byte(0), fp(f) {}
50+
void write(unsigned char bit) {
51+
curr_byte <<= 1;
52+
curr_byte |= bit;
53+
if(++bit_counter == 8) {
54+
fwrite(&curr_byte, 1, 1, fp);
55+
bit_counter = 0;
56+
curr_byte = 0;
57+
}
58+
}
59+
void flush() { //used to flush last byte
60+
if(bit_counter>0) {
61+
fwrite(&curr_byte, 1,1 ,fp);
62+
}
63+
}
64+
};
65+
66+
int main() {
67+
FILE *inp = fopen("binary_file.jpg", "rb");
68+
FILE *out = fopen("compressed.myformat", "wb");
69+
70+
vector<long long int> freq_store(256, 0);
71+
vector<string> code(256);
72+
vector<int> store;
73+
74+
unsigned char buffer[1];
75+
while(fread(buffer, 1, 1, inp)>0) { //arguments : character address to read to, size(in bytes) of each element to read, number of such elements to read, file desc
76+
if(freq_store[(int)buffer[0]] == 0 ) { //storing all the bytes present to push into queue later
77+
store.push_back((int)buffer[0]);
78+
}
79+
freq_store[(int)buffer[0]]++;
80+
}
81+
rewind(inp);
82+
83+
for(int i = 0; i<store.size(); i++) {
84+
cout << store[i] << " " << freq_store[store[i]] << endl;
85+
}
86+
87+
priority_queue<treeNode> pq;
88+
for(int i = 0; i<store.size(); i++) { //pushing all the stored bytes into the priority queue
89+
pq.push(treeNode(store[i], freq_store[(int)store[i]], NULL, NULL));
90+
}
91+
92+
treeNode *root;
93+
94+
while(pq.size() > 1) { //need to extract 2 elements
95+
treeNode t1 = pq.top();
96+
pq.pop();
97+
treeNode t2 = pq.top();
98+
pq.pop();
99+
100+
treeNode *l = new treeNode(t1);
101+
treeNode *r = new treeNode(t2);
102+
103+
root = new treeNode(OUT_OF_RANGE, l->freq+r->freq, l, r);
104+
105+
pq.push(*root);
106+
}
107+
108+
string s = "";
109+
codegenerator(root, s, code);
110+
111+
for(int i = 0; i<store.size(); i++) { //all this is stored as text, so each character takes 1 byte, which is too much space(5-6 KB max)
112+
fprintf(out, "%s %d ", code[store[i]].c_str(), store[i]);
113+
}
114+
fprintf(out, "| ");
115+
116+
long long int bitStreamLength = 0; // after | store the bit stream length in the file itself
117+
while(fread(buffer, 1, 1, inp)>0) {
118+
bitStreamLength += code[(int)buffer[0]].length();
119+
}
120+
rewind(inp);
121+
122+
fprintf(out, " %lld ", bitStreamLength); //actually printing the size of the bit stream to the file, binary data starts immediately after |
123+
124+
BitWriter bw(out);
125+
126+
while(fread(buffer, 1, 1, inp) > 0) {
127+
string cd = code[(int)buffer[0]];
128+
for(int i = 0; i<cd.length(); i++) {
129+
bw.write(cd[i]-'0'); //gives 0 or 1 integer value to write function instead of chars '0' or '1'
130+
}
131+
}
132+
rewind(inp);
133+
134+
bw.flush(); //write any remaining bits in buffer
135+
136+
return 0;
137+
}

0 commit comments

Comments
 (0)