add som and update minimax

xiecong · xiecong · commit 5ad33a61a776 · 2022-01-13T21:51:29.000+08:00
diff --git a/README.md b/README.md
@@ -114,12 +114,11 @@
 
 #### Instance-based Learning and Kernel Machines
 * k-Nearest Neighbors (kNN) `k_nearest_neighbors.py`
-* Learning Vector Quantization (WIP)
+* Learning Vector Quantization
 * Support Vector Machine (SVM) `support_vector_machine.py`
     * Soft boundary
     * SMO algorithm
     * Different heuristics for selecting pairs in SMO
-* Radial Basis Function Network (WIP)
 
 #### Swarm Intelligence
 * Evolutionary Algorithm (EA) `evolutionary_algorithm.py`
@@ -134,6 +133,7 @@
 * Monte Carlo tree search `monte_carlo_tree_search.py`
     * Upper Confidence Bound 1 applied to trees (UCT)
 * Minimax `minimax.py`
+    * Alpha-Beta Pruning
 
 #### Reinforcement Learning
 * Temporal difference learning `temporal_difference.py`
@@ -142,10 +142,11 @@
     * CNN Target & Policy Net
     * Epsilon-Greedy
 
-#### TODO - Unsupervised Learning
-* Clustering
-    * k-Means / dbscan / spectrum / hierachical / SOM
-* Dimension Reduction
-    * Principal Component Analysis / Linear Discriminant Analysis / mds / tsne
+#### Unsupervised Learning
+* Clustering (WIP)
+    * k-Means / dbscan / spectrum / hierachical
+* Dimension Reduction (WIP)
+    * SOM
+    * Principal Component Analysis / Linear Discriminant Analysis / MDS / t-SNE
 
 Feel free to use the code. Please contact me if you have any question: xiecng [at] gmail.com
diff --git a/deep_belief_network.py b/deep_belief_network.py
@@ -4,6 +4,7 @@
 from restricted_boltzmann_machine import RBM
 
 
+# this implementation reused the training of MLP for back propagation
 class DBN(object):
 
     def __init__(self, layers, n_labels):
diff --git a/minimax.py b/minimax.py
@@ -80,7 +80,7 @@ def heuristic(self, board, player):
                  n_size - 1 and board[i] == board[i + n_size + 1])
         return (-evals[0] * player + evals[1] * player) / (evals[0] + evals[1] + 1)
 
-    def score(self, board, player, depth):
+    def score(self, board, player, depth, alpha, beta):
         board_str = ''.join([str(i) for i in board])
         if board_str in self.cache:  # cached before
             return self.cache[board_str]
@@ -97,9 +97,16 @@ def score(self, board, player, depth):
             if board[i] != 0:
                 continue
             board[i] = player
-            board_scores[i] = -self.score(board, -player, depth + 1)[1]
+            board_scores[i] = -self.score(board, -player, depth + 1, alpha, beta)[1]
             heuristics_used[i] = ''.join([str(i) for i in board]) not in self.cache
             board[i] = 0
+            if(player == -1):
+                alpha = max(np.max(board_scores), alpha)
+            else:
+                beta = max(np.max(board_scores), beta)
+            # alpha beta pruning will reduce the # returned choice of winning moves
+            if alpha > -beta or (player == -1 and alpha == 1) or (player == 1 and beta == 1):
+                break
         best_score = np.amax(board_scores)
         best_moves = [i for i in range(board.shape[0]) if board_scores[
             i] == best_score]
@@ -108,7 +115,7 @@ def score(self, board, player, depth):
         return best_moves, best_score
 
     def act(self, board, player):
-        return np.random.choice(self.score(board, player, 0)[0])
+        return np.random.choice(self.score(board, player, 0, -2, -2)[0])
 
 
 def main():
diff --git a/self_organizing_map.py b/self_organizing_map.py
@@ -0,0 +1,52 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+class SOM(object):
+
+    def __init__(self):
+        self.sigma = 1
+        self.lr = 0.1
+        self.eps = 0.05
+        self.n_size = 10
+        self.iterations = 10
+        self.neighbors_radius = []
+        radius = 4
+        for i in range(-radius, radius+1):
+            for j in range(-radius, radius+1):
+                if i * i + j * j <= radius * radius:
+                    self.neighbors_radius.append((i, j))
+        self.w = None
+
+    def get_bmu(self, w, x):
+        dist = np.square(w - x).sum(axis=2)
+        index = np.argmin(dist)
+        return np.array([index // self.n_size, index % self.n_size])
+
+    def fit(self, x):
+        fig, ax = plt.subplots(nrows=2, ncols=5, subplot_kw=dict(xticks=[], yticks=[]))
+
+        self.w = np.random.randn(self.n_size, self.n_size, x.shape[1])
+        sigma_sq = self.sigma * self.sigma
+        for step in range(self.iterations):
+            for y in np.random.permutation(x):
+                i, j = self.get_bmu(self.w, y)
+                # update w
+                for di, dj in self.neighbors_radius:
+                    if i + di >= 0 and i + di < self.n_size and j + di >= 0 and j + dj < self.n_size: 
+                        self.w[i + di][j + dj] += self.lr * (y - self.w[i + di][j + dj]) * np.exp(-np.square([di, dj]).sum() / 2 / sigma_sq)
+            self.lr *= np.exp(-step * self.eps)
+            sigma_sq *= np.exp(-step * self.eps)
+            ax[step//5][step%5].imshow(self.w.astype(int))
+            ax[step//5][step%5].title.set_text(step)
+        plt.show()
+        return self.w
+
+def main():
+    som = SOM()
+    x = np.random.randint(0, 255, (3000, 3))
+    w = som.fit(x)
+
+
+if __name__ == "__main__":
+    main()