Skip to content

Commit fd5cafb

Browse files
author
yezea mm
committed
Add code
1 parent 06f429b commit fd5cafb

File tree

6 files changed

+775
-0
lines changed

6 files changed

+775
-0
lines changed

decoder_lzss.py

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
"""
2+
Name: Hew Ye Zea
3+
Student ID: 29035546
4+
Date Created : 16/6/2020
5+
Date Last Edited : 26/6/2020
6+
"""
7+
import sys
8+
from huffman import *
9+
from elias import *
10+
from bitarray import bitarray
11+
12+
13+
def decode(filename):
14+
"""
15+
Decode a file
16+
:param filename: file to be decoded
17+
:return: a file that contains the decoded string
18+
"""
19+
20+
bitcode = bitarray()
21+
file = open(filename, 'rb')
22+
bitcode.fromfile(file)
23+
24+
# build a huffman tree to decode characters
25+
huffman_decoder = HuffmanTree()
26+
start_index = huffman_decoder.buildTree(bitcode)
27+
28+
# decode the number of formats
29+
num_of_format, start_index = eliasDecode(bitcode,start_index)
30+
31+
retval = ''
32+
# decode all formats
33+
for i in range(num_of_format):
34+
start_index += 1
35+
36+
# match length > 3 : format starts with "0"
37+
if not bitcode[start_index - 1]:
38+
offset, start_index = eliasDecode(bitcode,start_index) # decode the offset
39+
matched_length, start_index = eliasDecode(bitcode,start_index) # decode match length
40+
41+
offset_start = len(retval) - offset
42+
43+
for j in range(matched_length):
44+
retval += retval[j + offset_start] # copy the characters
45+
46+
# match length < 3 , format starts with "1"
47+
else:
48+
char_ascii,start_index = huffman_decoder.decodeHuffman(bitcode,start_index)
49+
retval += chr(char_ascii) # decode the character
50+
51+
output = open('output_decoder_lzss.txt', 'w')
52+
output.write(retval)
53+
output.close()
54+
file.close()
55+
56+
57+
if __name__ == '__main__':
58+
_, filename = sys.argv
59+
decode(filename)
60+
61+
#decode('output_encoder_lzss.bin')

elias.py

+95
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
"""
2+
Name: Hew Ye Zea
3+
Student ID: 29035546
4+
Date Created : 11/6/2020
5+
Date Last Edited : 26/6/2020
6+
"""
7+
8+
from bitarray import bitarray
9+
10+
11+
def fromDecToBinary(num):
12+
"""
13+
Generate binary representation of num
14+
:param num: the number
15+
:return: a bitarray that represents num
16+
"""
17+
temp = []
18+
while num > 0:
19+
temp.append(num & 1)
20+
num = num >> 1
21+
temp = temp[::-1] # reverse the array to get the correct binary representation
22+
return bitarray(temp)
23+
24+
25+
def fromBinaryToDec(binary, start, end, flipBit=False):
26+
"""
27+
Convert a portion of the bitarray to its respective decimal value
28+
:param binary: bitarray
29+
:param start: staring index
30+
:param end: ending index
31+
:param flipBit : boolean, true = flip starting bit
32+
:return: binary[start:end] as decimal value
33+
"""
34+
bin = ""
35+
if flipBit :
36+
binary[start] = "1"
37+
for bit_index in range(start,end):
38+
if binary[bit_index]:
39+
bin += "1"
40+
else:
41+
bin += "0"
42+
43+
return int(bin,2)
44+
45+
46+
def eliasEncode(num):
47+
"""
48+
Encode an integer using Elias Encoding method
49+
:param num: the integer to be elias encoded
50+
:return: the elias encoded integer
51+
"""
52+
binary_num = fromDecToBinary(num)
53+
encoded = []
54+
encoded.append(binary_num)
55+
length_bin = len(binary_num)
56+
57+
while length_bin > 1 :
58+
# encode all length code
59+
new_length = fromDecToBinary(length_bin-1)
60+
new_length[0] = 0 # flip the first bit to 0 , to denote that this is a length code
61+
encoded.append(new_length)
62+
length_bin = len(new_length)
63+
64+
result = bitarray('')
65+
for i in range(len(encoded)-1,-1,-1):
66+
result += encoded[i] # reverse the whole thing
67+
68+
return result
69+
70+
71+
def eliasDecode(code,current_i = 0):
72+
"""
73+
Decode an elias encoded integer
74+
:param code: a bitstring that is elias encoded
75+
:param current_i: current index of i
76+
:return:
77+
"""
78+
79+
# if the current bit == "1" , that denotes the smallest possible integer , return 1 straight away
80+
if code[current_i]:
81+
return 1,current_i + 1
82+
83+
else:
84+
index = 1
85+
length = 2
86+
# "0" denotes the length
87+
# while the next bit is "0" , keep finding the new length
88+
while not code[current_i+index]:
89+
new_length = fromBinaryToDec(code, current_i+index, current_i+index+length,True)
90+
index += length # increment index
91+
length = new_length + 1
92+
93+
decoded_num = fromBinaryToDec(code,current_i+index, current_i+index+length) # decode the number
94+
95+
return decoded_num, index + current_i + length

encoder_lzss.py

+127
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
"""
2+
Name: Hew Ye Zea
3+
Student ID: 29035546
4+
Date Created : 16/6/2020
5+
Date Last Edited : 26/6/2020
6+
"""
7+
import sys
8+
from huffman import *
9+
from elias import *
10+
from z_algo import *
11+
from bitarray import bitarray
12+
13+
14+
def encode(filename,w,l):
15+
16+
# read file
17+
string = readfile(filename)
18+
19+
# encode the string using huffman
20+
encoded , uniq_char = encodeHuffman(string)
21+
22+
# encode header and data
23+
header = encode_header(uniq_char,encoded)
24+
data = encode_data(string,w,l,encoded)
25+
26+
# concat header and data
27+
result = header + data
28+
29+
# output result to file
30+
output = open('output_encoder_lzss.bin', 'wb')
31+
result.tofile(output)
32+
output.close()
33+
34+
35+
def encode_header(uniq_char, encoded):
36+
"""
37+
Encode the header
38+
:param uniq_char: number of unique characters
39+
:param encoded: the huffman code of each unique character
40+
:return: an encoded header
41+
"""
42+
# header consists of
43+
# 1. unique char - elias coded
44+
# 2. ascii code of each char - elias coded
45+
# 3. length of huffman code - elias coded
46+
# 4. huffman code
47+
result = bitarray()
48+
result += eliasEncode(uniq_char) # add elias encoded # of unique char to header [1]
49+
50+
for ascii,huffman_code in enumerate(encoded):
51+
if huffman_code is not None :
52+
ascii_in_binary = fromDecToBinary(ascii)
53+
ascii_encoded = bitarray('0' * (7 - len(ascii_in_binary))) + ascii_in_binary # elias coded ascii code [2]
54+
huffman_code_length = eliasEncode(len(huffman_code)) # elias coded - length of huffman code [3]
55+
result = result + ascii_encoded + huffman_code_length + huffman_code # concat huffman code
56+
57+
return result
58+
59+
60+
def encode_data(string,w,l,encoded):
61+
"""
62+
Encode the data part
63+
:param string: string to be encoded
64+
:param w: window size
65+
:param l: lookahead buffer size
66+
:param encoded: the huffman code of each unique character
67+
:return: a bitstring that contains the encoded data
68+
"""
69+
result = bitarray()
70+
number_of_formats = 0
71+
index = 0
72+
73+
while index < len(string):
74+
# increment the number of formats
75+
number_of_formats += 1
76+
77+
# run z algorithm on string to calculate the matched length
78+
z_arr = z_algo(string,w,l,index)
79+
80+
maximum_length = -1
81+
maximum_offset = -1
82+
83+
# obtain the maximum match length
84+
for i in range(len(z_arr)):
85+
if z_arr[i] >= maximum_length:
86+
maximum_offset = i
87+
maximum_length = z_arr[i]
88+
89+
# matched length > 3 , encode data in the format of :
90+
# 0 + offset [1] + matched length [2]
91+
if maximum_length >= 3 :
92+
zero = bitarray('0')
93+
offset = eliasEncode(len(z_arr) - maximum_offset) # [1]
94+
length = eliasEncode(maximum_length) # [2]
95+
result = result + zero + offset + length
96+
index += maximum_length # increment index
97+
98+
# matched length < 3 , encode data in the format of :
99+
# 1 + ascii huffman code [1]
100+
else:
101+
one = bitarray('1')
102+
char = string[index]
103+
ascii_huffman = encoded[ord(char)] # [1]
104+
105+
result = result + one + ascii_huffman
106+
index += 1 # increment index
107+
108+
elias_format = eliasEncode(number_of_formats) # elias encode the total number of format
109+
result = elias_format + result # prepend the elias encoded format to result
110+
111+
return result
112+
113+
114+
def readfile(filename):
115+
file = open(filename,"r")
116+
lines = []
117+
for line in file:
118+
lines.append(line)
119+
file.close()
120+
return ''.join(lines)
121+
122+
123+
if __name__ == '__main__':
124+
_, filename, w, l = sys.argv
125+
encode(filename, int(w), int(l))
126+
127+

0 commit comments

Comments
 (0)