-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathz_algo.py
148 lines (123 loc) · 5.31 KB
/
z_algo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
"""
Name: Hew Ye Zea
Student ID: 29035546
Date Created : 13/6/2020
Date Last Edited : 26/6/2020
"""
def z_algo(string, w_size, l_size, buffer_start):
"""
Z algorithm
:param string: the string
:param w_size: window size
:param l_size: lookahead buffer size
:param buffer_start: the starting index of buffer
"""
def z_for_buffer(buffer_size):
"""
run Z algorithm for characters in the buffer / lookahead section
:param buffer_size: size of the buffer / lookahead section
"""
# initialise z array
z_buffer_arr = [0] * buffer_size
# initialise first position of z array
z_buffer_arr[0] = buffer_size
left = -1
right = -1
for k in range(1, buffer_size):
# case 1 - no z box
if k > right:
count = 0
# character a index : buffer_start + k + count
# character b index : buffer_start + count
while (k + count) < buffer_size and string[buffer_start + k + count] == string[buffer_start + count]:
count += 1
left = k
right = k + count - 1
z_buffer_arr[k] = count
else:
remain = right - k + 1
K = k - left
# case 2a
if z_buffer_arr[K] < remain:
z_buffer_arr[k] = z_buffer_arr[K]
# case 2b
elif z_buffer_arr[K] > remain:
z_buffer_arr[k] = remain
# case 2c
else:
count = 0
while (k + remain + count) < buffer_size and string[buffer_start + right + 1 + count] == string[buffer_start + right - k + 1 + count]:
count += 1
left = k
right = k + count - 1
z_buffer_arr[k] = count
return z_buffer_arr
def z_for_window():
"""
Z algorithm for lzss, to calculate matched length of window ( dictionary ) & lookahead
:return:
"""
# initialise buffer size , this is to make sure the buffer size is within len(string)
buffer_size = min(l_size, len(string) - buffer_start)
# initialise the size of the window , this is to prevent negative index
window_start = max(0, buffer_start - w_size)
window_size = buffer_start - window_start # calculate the size of window / dictionary
z_buffer_arr = z_for_buffer(buffer_size) # run Z for characters within the lookahead section
# initialise z array for characters within the window
z_window_arr = [0] * window_size
# total length ( buffer size + window size )
total_length = buffer_size + window_size
left = -1
right = -1
for k in range(window_size):
# case 1 - no z box
if k > right:
# compare characters and compute matched length
# z_window_arr[k] stores the macthed length
z_window_arr[k] = lz_compare(total_length,buffer_start,window_size,buffer_size,k,z_window_arr[k])
left = k
right = z_window_arr[k] + k - 1
else:
remain = right - k + 1
K = k - left
# case 2a
if z_buffer_arr[K] < remain:
z_window_arr[k] = z_buffer_arr[K]
# case 2b - z box exists
elif z_window_arr[K] > remain:
z_window_arr[k] = remain
# calculate matched length outside z box
z_window_arr[k] = lz_compare(total_length, buffer_start, window_size, buffer_size, k,
z_window_arr[k])
left = k
right = z_window_arr[k] + k - 1
# case 2c
else:
z_window_arr[k] = lz_compare(total_length, buffer_start, window_size, buffer_size, k,
z_window_arr[k])
left = k
right = z_window_arr[k] + k - 1
return z_window_arr
def lz_compare(total_length,buffer_start,window_size,buffer_size,k,count):
"""
Compare the characters of two index & compute matched length
- matched length cannot exceed buffer size
:param total_length: size of dictionary/window & lookahead buffer
:param buffer_start: starting index of buffer
:param window_size: size of dictionary
:param buffer_size: size of buffer/lookahead
:param k: current iteration
:param count: matched length
:return:
"""
index_a = k + count + buffer_start - window_size
index_b = count + buffer_start
within_string_length = index_a < len(string) and index_b < len(string) # make sure index is within len(string)
while (count + k ) < total_length and count < buffer_size \
and within_string_length and string[index_a] == string[index_b]:
count += 1
index_a = k + count + buffer_start - window_size
index_b = count + buffer_start
within_string_length = index_a < len(string) and index_b < len(string)
return count
return z_for_window()