Skip to content

Commit fa53e09

Browse files
committed
Add AdaBoostStump Algorithm
1 parent 3072c2e commit fa53e09

8 files changed

+1218
-256
lines changed

FukuML/AdaBoostStump.py

+181
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
#encoding=utf8
2+
3+
import os
4+
import numpy as np
5+
import FukuML.Utility as utility
6+
import FukuML.MLBase as ml
7+
import FukuML.DecisionStump as decision_stump
8+
9+
10+
class BinaryClassifier(ml.Learner):
11+
12+
def __init__(self):
13+
14+
"""init"""
15+
16+
self.status = 'empty'
17+
self.train_X = []
18+
self.train_Y = []
19+
self.W = []
20+
self.data_num = 0
21+
self.data_demension = 0
22+
self.test_X = []
23+
self.test_Y = []
24+
self.feature_transform_mode = ''
25+
self.feature_transform_degree = 1
26+
27+
self.run_t = 40
28+
self.weak_learner = []
29+
self.alpha = []
30+
self.temp_train_X = []
31+
32+
def load_train_data(self, input_data_file=''):
33+
34+
self.status = 'load_train_data'
35+
36+
if (input_data_file == ''):
37+
input_data_file = os.path.normpath(os.path.join(os.path.join(os.getcwd(), os.path.dirname(__file__)), "dataset/decision_stump_train.dat"))
38+
else:
39+
if (os.path.isfile(input_data_file) is not True):
40+
print("Please make sure input_data_file path is correct.")
41+
return self.train_X, self.train_Y
42+
43+
self.train_X, self.train_Y = utility.DatasetLoader.load(input_data_file)
44+
45+
return self.train_X, self.train_Y
46+
47+
def load_test_data(self, input_data_file=''):
48+
49+
if (input_data_file == ''):
50+
input_data_file = os.path.normpath(os.path.join(os.path.join(os.getcwd(), os.path.dirname(__file__)), "dataset/decision_stump_test.dat"))
51+
else:
52+
if (os.path.isfile(input_data_file) is not True):
53+
print("Please make sure input_data_file path is correct.")
54+
return self.test_X, self.test_Y
55+
56+
self.test_X, self.test_Y = utility.DatasetLoader.load(input_data_file)
57+
58+
if (self.feature_transform_mode == 'polynomial') or (self.feature_transform_mode == 'legendre'):
59+
self.test_X = self.test_X[:, 1:]
60+
61+
self.test_X = utility.DatasetLoader.feature_transform(
62+
self.test_X,
63+
self.feature_transform_mode,
64+
self.feature_transform_degree
65+
)
66+
67+
return self.test_X, self.test_Y
68+
69+
def set_param(self, run_t):
70+
71+
self.run_t = run_t
72+
73+
return self.run_t
74+
75+
def init_W(self, mode='normal'):
76+
77+
if (self.status != 'load_train_data') and (self.status != 'train'):
78+
print("Please load train data first.")
79+
return self.W
80+
81+
self.status = 'init'
82+
83+
self.data_num = len(self.train_Y)
84+
self.data_demension = len(self.train_X[0])
85+
self.weak_learner = [None] * self.run_t
86+
self.alpha = [0.0] * self.run_t
87+
self.W = np.zeros(self.data_demension)
88+
89+
return self.W
90+
91+
def score_function(self, x, W):
92+
93+
score = 0.0
94+
95+
for i, weak_learner in enumerate(self.weak_learner):
96+
predict_string = np.array(map(str, x))
97+
predict_string = ' '.join(predict_string[1:])
98+
prediction = weak_learner.prediction(predict_string, 'future_data')
99+
score = score + (self.alpha[i] * prediction['prediction'])
100+
101+
score = np.sign(score)
102+
103+
return score
104+
105+
def error_function(self, y_prediction, y_truth):
106+
107+
if y_prediction != y_truth:
108+
return 1
109+
else:
110+
return 0
111+
112+
def calculate_avg_error(self, X, Y, W):
113+
114+
return super(BinaryClassifier, self).calculate_avg_error(X, Y, W)
115+
116+
def calculate_test_data_avg_error(self):
117+
118+
return super(BinaryClassifier, self).calculate_test_data_avg_error()
119+
120+
def calculate_alpha_u(self, weak_learner, u):
121+
122+
alpha = 0.0
123+
epsiloin = 0.0
124+
data_num = len(weak_learner.train_Y)
125+
126+
for i in range(data_num):
127+
predict_string = np.array(map(str, weak_learner.train_X[i]))
128+
predict_string = ' '.join(predict_string[1:]) + ' ' + str(weak_learner.train_Y[i])
129+
prediction = weak_learner.prediction(predict_string, 'test_data')
130+
if (float(prediction['prediction']) != float(prediction['input_data_y'])):
131+
epsiloin += (u[i] * 1.0)
132+
133+
epsiloin = epsiloin / np.sum(u)
134+
tune_alpha = np.sqrt((1.0-epsiloin)/epsiloin)
135+
alpha = np.log(tune_alpha)
136+
137+
new_u = []
138+
139+
for i in range(data_num):
140+
predict_string = np.array(map(str, weak_learner.train_X[i]))
141+
predict_string = ' '.join(predict_string[1:]) + ' ' + str(weak_learner.train_Y[i])
142+
prediction = weak_learner.prediction(predict_string, 'test_data')
143+
if (float(prediction['prediction']) != float(prediction['input_data_y'])):
144+
new_u.append(u[i] * tune_alpha)
145+
else:
146+
new_u.append(u[i] / tune_alpha)
147+
148+
return alpha, np.array(new_u)
149+
150+
def train(self):
151+
152+
if (self.status != 'init'):
153+
print("Please load train data and init W first.")
154+
return self.W
155+
156+
self.status = 'train'
157+
158+
u = np.array([(1.0 / self.data_num)] * self.data_num)
159+
160+
for t in range(self.run_t):
161+
162+
print("Round "+str(t+1))
163+
164+
decision_stump_bc = decision_stump.BinaryClassifier()
165+
decision_stump_bc.status = 'load_train_data'
166+
decision_stump_bc.train_X = self.train_X
167+
decision_stump_bc.train_Y = self.train_Y
168+
decision_stump_bc.set_param(u)
169+
decision_stump_bc.init_W()
170+
decision_stump_bc.train()
171+
172+
alpha, u = self.calculate_alpha_u(decision_stump_bc, u)
173+
174+
self.weak_learner[t] = decision_stump_bc
175+
self.alpha[t] = alpha
176+
177+
return self.W
178+
179+
def prediction(self, input_data='', mode='test_data'):
180+
181+
return super(BinaryClassifier, self).prediction(input_data, mode)

FukuML/DecisionStump.py

+14-6
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ def __init__(self):
2727
self.sign = 1
2828
self.feature_index = 0
2929
self.theta = 0
30+
self.u = None
3031

3132
def load_train_data(self, input_data_file=''):
3233

@@ -79,9 +80,11 @@ def load_test_data(self, input_data_file=''):
7980

8081
return self.test_X, self.test_Y
8182

82-
def set_param(self):
83+
def set_param(self, u=None):
8384

84-
return
85+
self.u = u
86+
87+
return self.u
8588

8689
def init_W(self, mode='normal'):
8790

@@ -100,6 +103,9 @@ def init_W(self, mode='normal'):
100103
self.data_demension = len(self.train_X[0])
101104
self.W = np.zeros(self.data_demension)
102105

106+
if self.u is None:
107+
self.u = np.array([(1.0 / self.data_num)] * self.data_num)
108+
103109
return self.W
104110

105111
def score_function(self, x, W):
@@ -147,21 +153,23 @@ def train(self):
147153

148154
dim_X = self.train_X[:, i]
149155
dim_XY = np.transpose(np.array([dim_X, self.train_Y]))
150-
sort_dim_XY = dim_XY[np.argsort(dim_XY[:, 0])]
156+
sort_index = np.argsort(dim_XY[:, 0])
157+
sort_dim_XY = dim_XY[sort_index]
158+
sort_u = self.u[sort_index]
151159

152160
sort_dim_X = sort_dim_XY[:, 0]
153161
sort_dim_Y = sort_dim_XY[:, 1]
154162

155163
thetas = np.array([float("-inf")] + [(sort_dim_X[j] + sort_dim_X[j+1])/2 for j in range(0, self.data_num-1)] + [float("inf")])
156-
error_in_i = self.data_num/self.data_num
164+
error_in_i = sum(sort_u)
157165
sign_i = 1
158166
theta_i = 0.0
159167

160168
for theta in thetas:
161169
y_positive = np.where(sort_dim_X > theta, 1, -1)
162170
y_negative = np.where(sort_dim_X < theta, 1, -1)
163-
error_positive = sum(y_positive != sort_dim_Y)/self.data_num
164-
error_negative = sum(y_negative != sort_dim_Y)/self.data_num
171+
error_positive = sum((y_positive != sort_dim_Y)*sort_u)
172+
error_negative = sum((y_negative != sort_dim_Y)*sort_u)
165173
if error_positive > error_negative:
166174
if error_in_i > error_negative:
167175
error_in_i = error_negative

0 commit comments

Comments
 (0)