xiecong
diff --git a/‎README.md‎
Lines changed: 3 additions & 1 deletion b/‎README.md‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎adaboost.py‎
Lines changed: 42 additions & 36 deletions b/‎adaboost.py‎
Lines changed: 42 additions & 36 deletions
diff --git a/‎bayesian_net.py‎
Lines changed: 66 additions & 56 deletions b/‎bayesian_net.py‎
Lines changed: 66 additions & 56 deletions
@@ -10,9 +10,11 @@ Also see decision boundary visualization for implemented classifiers in decision
 Implemented algorithms:
 
 * Regression Models
-    * Ridge Regression
+    * Linear Regression
         * Matrix solver
         * SGD/Adam solver
+        * L1 regularization Lasso
+        * L2 regularization Ridge
     * Logistic Regression
         * Multi-class prediction
     * Factorization Machines
 
@@ -4,45 +4,51 @@
 
 
 class AdaBoost(object):
-	def __init__(self, esti_num=10):
-		self.esti_num = esti_num
-		self.estimators = []
-		self.alphas = []
-
-	def fit(self, x, y):
-		n_data = x.shape[0]
-		w = np.ones(x.shape[0]) / n_data
-		eps = 1e-16
-		prediction = np.zeros(n_data)
-		for i in range(self.esti_num):
-			self.estimators.append(DecisionTree(metric_type='Gini impurity', depth=2))
-			self.estimators[i].fit(x, y, w)
-			pred_i = self.estimators[i].predict(x)
-			error_i = (pred_i!=y).dot(w.T)
-			self.alphas.append(np.log((1.0-error_i)/(error_i+eps))/2)
-			w = w * np.exp(self.alphas[i]*(2*(pred_i!=y)-1))
-			w = w / w.sum()
-
-			prediction += pred_i * self.alphas[i]
-			print("Tree {} constructed, acc {}".format(i, (np.sign(prediction) == y).sum()/n_data))
-
-	def predict(self, x):
-		return sum(esti.predict(x) * alpha for esti, alpha in zip(self.estimators, self.alphas))
 
+    def __init__(self, esti_num=10):
+        self.esti_num = esti_num
+        self.estimators = []
+        self.alphas = []
+
+    def fit(self, x, y):
+        n_data = x.shape[0]
+        w = np.ones(x.shape[0]) / n_data
+        eps = 1e-16
+        prediction = np.zeros(n_data)
+        for i in range(self.esti_num):
+            self.estimators.append(DecisionTree(
+                metric_type='Gini impurity', depth=2))
+            self.estimators[i].fit(x, y, w)
+            pred_i = self.estimators[i].predict(x)
+            error_i = (pred_i != y).dot(w.T)
+            self.alphas.append(np.log((1.0 - error_i) / (error_i + eps)) / 2)
+            w = w * np.exp(self.alphas[i] * (2 * (pred_i != y) - 1))
+            w = w / w.sum()
+
+            prediction += pred_i * self.alphas[i]
+            print("Tree {} constructed, acc {}".format(
+                i, (np.sign(prediction) == y).sum() / n_data))
+
+    def predict(self, x):
+        return sum(esti.predict(x) * alpha for esti, alpha in zip(self.estimators, self.alphas))
 
-def main():
-	data = load_breast_cancer()
-	y = data.target*2-1
-	test_ratio = 0.2
-	test_split = np.random.uniform(0, 1, len(data.data))
-	train_x, test_x = data.data[test_split >= test_ratio], data.data[test_split < test_ratio]
-	train_y, test_y = y[test_split >= test_ratio], y[test_split < test_ratio]
 
-	adaboost = AdaBoost()
-	adaboost.fit(train_x, train_y)
-	print((np.sign(adaboost.predict(train_x))==train_y).sum()/train_x.shape[0])
-	print((np.sign(adaboost.predict(test_x))==test_y).sum()/test_x.shape[0])
+def main():
+    data = load_breast_cancer()
+    y = data.target * 2 - 1
+    test_ratio = 0.2
+    test_split = np.random.uniform(0, 1, len(data.data))
+    train_x, test_x = data.data[test_split >=
+                                test_ratio], data.data[test_split < test_ratio]
+    train_y, test_y = y[test_split >= test_ratio], y[test_split < test_ratio]
+
+    adaboost = AdaBoost()
+    adaboost.fit(train_x, train_y)
+    print((np.sign(adaboost.predict(train_x))
+           == train_y).sum() / train_x.shape[0])
+    print((np.sign(adaboost.predict(test_x))
+           == test_y).sum() / test_x.shape[0])
 
 
 if __name__ == "__main__":
-    main()
+    main()
@@ -5,71 +5,81 @@
 
 
 class BayesianNet(object):
-	def __init__(self, names, edges, tables=None):
-		self.n_nodes = len(names)
-		if tables is None: tables = [[0]] * self.n_nodes
-		self.nodes = [{'name': name, 'table': np.array(table)} for name, table in zip(names, tables)]
-		self.name2idx = {k: v for v, k in enumerate(names)}
-		self.graph = np.zeros((self.n_nodes, self.n_nodes))
-		for edge in edges:
-			self.graph[self.name2idx[edge[1]], self.name2idx[edge[0]]] = 1
-		self.binary = np.array([1 << self.n_nodes - 1 - i for i in range(self.n_nodes)])
 
-	def fit(self, data):
-		data_size = len(data)
-		for i, node in enumerate(self.nodes):
-			table = []
-			parents = self.graph[i]==1
-			marginal = data[:, parents]
-			index = np.zeros(data.shape[0])
-			if marginal.shape[1] > 0:
-				index = (marginal * self.binary[-marginal.shape[1]:]).sum(axis=1)
-			for j in range(2**parents.sum()):
-				table.append(data[(index == j), i].sum() / (index == j).sum())
-			node['table'] = np.array(table)
+    def __init__(self, names, edges, tables=None):
+        self.n_nodes = len(names)
+        if tables is None:
+            tables = [[0]] * self.n_nodes
+        self.nodes = [{'name': name, 'table': np.array(
+            table)} for name, table in zip(names, tables)]
+        self.name2idx = {k: v for v, k in enumerate(names)}
+        self.graph = np.zeros((self.n_nodes, self.n_nodes))
+        for edge in edges:
+            self.graph[self.name2idx[edge[1]], self.name2idx[edge[0]]] = 1
+        self.binary = np.array(
+            [1 << self.n_nodes - 1 - i for i in range(self.n_nodes)])
 
-	def joint_p(self, values):
-		p = 1
-		for i in range(self.n_nodes):
-			index = 0
-			parents = self.graph[i]==1
-			if parents.sum() > 0:
-				index = np.dot(values[parents], self.binary[-parents.sum():])
-			p *= (1 - values[i]) + (2 * values[i] - 1) * self.nodes[i]['table'][int(index)]
-		return p
+    def fit(self, data):
+        data_size = len(data)
+        for i, node in enumerate(self.nodes):
+            table = []
+            parents = self.graph[i] == 1
+            marginal = data[:, parents]
+            index = np.zeros(data.shape[0])
+            if marginal.shape[1] > 0:
+                index = (
+                    marginal * self.binary[-marginal.shape[1]:]).sum(axis=1)
+            for j in range(2**parents.sum()):
+                table.append(data[(index == j), i].sum() / (index == j).sum())
+            node['table'] = np.array(table)
 
-	def marginal_p(self, condition):
-		p = 0
-		values = -np.ones(self.n_nodes)
-		for v in condition:
-			values[self.name2idx[v[1]]] = int(v[0] != '~')
-		mask = np.arange(self.n_nodes)[(values==-1)]
-		n_unkowns = self.n_nodes - len(condition)
-		for i in range(2**n_unkowns):
-			values[mask] = np.array([int(x) for x in '{:0{size}b}'.format(i, size=n_unkowns)])
-			p += self.joint_p(values)
-		return p
+    def joint_p(self, values):
+        p = 1
+        for i in range(self.n_nodes):
+            index = 0
+            parents = self.graph[i] == 1
+            if parents.sum() > 0:
+                index = np.dot(values[parents], self.binary[-parents.sum():])
+            p *= (1 - values[i]) + (2 * values[i] - 1) * \
+                self.nodes[i]['table'][int(index)]
+        return p
+
+    def marginal_p(self, condition):
+        p = 0
+        values = -np.ones(self.n_nodes)
+        for v in condition:
+            values[self.name2idx[v[1]]] = int(v[0] != '~')
+        mask = np.arange(self.n_nodes)[(values == -1)]
+        n_unkowns = self.n_nodes - len(condition)
+        for i in range(2**n_unkowns):
+            values[mask] = np.array(
+                [int(x) for x in '{:0{size}b}'.format(i, size=n_unkowns)])
+            p += self.joint_p(values)
+        return p
+
+    def query(self, v, condition):
+        p_pos = self.marginal_p([f'+{v}'] + condition) / self.marginal_p(condition)
+        return [1 - p_pos, p_pos]
 
-	def query(self, v, condition):
-		p_pos = self.marginal_p([f'+{v}'] + condition) / self.marginal_p(condition)
-		return [1 - p_pos, p_pos]
 
 def get_asia_data(url):
-	return read_csv(url).apply(lambda x: x == 'yes').astype(int).values
+    return read_csv(url).apply(lambda x: x == 'yes').astype(int).values
 
 
 def main():
-	names = 'ATSLBEXD'
-	edges = ['AT', 'SL', 'SB', 'TE', 'LE', 'BD', 'EX', 'ED']
-	#tables = [[0.01], [0.01, 0.05], [0.5], [0.01, 0.1], [0.3, 0.6], [0, 1, 1, 1], [0.05, 0.98], [0.1, 0.7, 0.8, 0.9]]
-	bn = BayesianNet(list(names), edges)  # also can use predefined conditional tables
-	asia_url = 'http://www.ccd.pitt.edu/wiki/images/ASIA10k.csv'
-	bn.fit(get_asia_data(asia_url))
-	print(bn.nodes)
-	for condition in [[], ['+A', '~S'], ['+A', '~S', '~D', '+X']]:
-		for c in ['T', 'L', 'B', 'E']:
-			print('p({}|{})={}'.format(c, ','.join(condition), bn.query(c, condition)))
+    names = 'ATSLBEXD'
+    edges = ['AT', 'SL', 'SB', 'TE', 'LE', 'BD', 'EX', 'ED']
+    #tables = [[0.01], [0.01, 0.05], [0.5], [0.01, 0.1], [0.3, 0.6], [0, 1, 1, 1], [0.05, 0.98], [0.1, 0.7, 0.8, 0.9]]
+    # also can use predefined conditional tables
+    bn = BayesianNet(list(names), edges)
+    asia_url = 'http://www.ccd.pitt.edu/wiki/images/ASIA10k.csv'
+    bn.fit(get_asia_data(asia_url))
+    print(bn.nodes)
+    for condition in [[], ['+A', '~S'], ['+A', '~S', '~D', '+X']]:
+        for c in ['T', 'L', 'B', 'E']:
+            print('p({}|{})={}'.format(c, ','.join(
+                condition), bn.query(c, condition)))
 
 
 if __name__ == "__main__":
-	main()
+    main()