-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathminimax.py
131 lines (109 loc) · 4.75 KB
/
minimax.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import numpy as np
# implements Minimax for Tic Tac Toe / Gomoku
n_size = 3
n_connect = 3
def is_done(board):
for i in range(n_size * n_size):
x, y = i % n_size, i // n_size
x_end = x + n_connect
x_rev_end = x - n_connect
y_end = y + n_connect
if ( # -
x_end <= n_size and abs(board[y, x:x_end].sum()) == n_connect
) or ( # |
y_end <= n_size and abs(board[y:y_end, x].sum()) == n_connect
) or ( # \
x_end <= n_size and y_end <= n_size and abs(
board[range(y, y_end), range(x, x_end)].sum()) == n_connect
) or ( # /
x_rev_end >= -1 and y_end <= n_size and abs(
board[range(y, y_end), range(x, x_rev_end, -1)].sum()) == n_connect
):
return board[y, x]
return 0
def play(agents):
board = np.zeros(n_size * n_size).astype(int)
record = np.zeros(n_size * n_size)
winner = 0
n_moves = 0
for move in range(n_size * n_size):
n_moves += 1
player = move % 2 * 2 - 1
action_pos = agents[move % 2].act(board, player)
record[action_pos] = n_moves
board[action_pos] = player
winner = is_done(board.reshape((n_size, n_size)))
if abs(winner) == 1:
break
return record.reshape((n_size, n_size)), winner
def test(agents):
game_records = [0, 0, 0]
for i in range(100):
idx = [0, 1] # np.random.permutation([0, 1]).astype(int)
board, winner = play([agents[idx[0]], agents[idx[1]]])
game_records[-int(winner) * (2 * idx[0] - 1) + 1] += 1
return game_records
class RandomMove(object):
def act(self, board, player):
return np.random.choice(n_size * n_size, p=(1 - np.abs(board)) / (1 - abs(board)).sum())
class MiniMax(object):
def __init__(self, max_depth=4):
self.cache = {}
self.max_depth = max_depth
def heuristic(self, board, player):
# a. in [1, -1], b. score(player1) = -score(player2)
evals = [0, 0] # for player -1 1
for i in range(n_size * n_size):
if board[i] == 0:
continue
evals[(board[i] + 1 ) // 2] += \
(i % n_size < n_size - 1 and board[i] == board[i + 1]) + \
(i + n_size < board.shape[0] - 1 and board[i] == board[i + n_size]) + \
(i + n_size < board.shape[0] - 1 and i % n_size > 0 and board[i] == board[i + n_size - 1]) + \
(i + n_size < board.shape[0] - 1 and i % n_size <
n_size - 1 and board[i] == board[i + n_size + 1])
return (-evals[0] * player + evals[1] * player) / (evals[0] + evals[1] + 1)
def score(self, board, player, depth, alpha, beta):
board_str = ''.join([str(i) for i in board])
if board_str in self.cache: # cached before
return self.cache[board_str]
winner = is_done(board.reshape(n_size, n_size))
if np.abs(board).sum() == board.shape[0] or winner != 0: # game end
self.cache[board_str] = ([], winner * player)
return [], winner * player
if depth >= self.max_depth:
return [], self.heuristic(board, player)
# a value less than -1 so next step can pick a legal move
board_scores = np.ones(board.shape[0]) * -2
heuristics_used = np.zeros(board.shape[0])
for i in range(board.shape[0]):
if board[i] != 0:
continue
board[i] = player
board_scores[i] = -self.score(board, -player, depth + 1, alpha, beta)[1]
heuristics_used[i] = ''.join([str(i) for i in board]) not in self.cache
board[i] = 0
if(player == -1):
alpha = max(np.max(board_scores), alpha)
else:
beta = max(np.max(board_scores), beta)
# alpha beta pruning will reduce the # returned choice of winning moves
if alpha > -beta or (player == -1 and alpha == 1) or (player == 1 and beta == 1):
break
best_score = np.amax(board_scores)
best_moves = [i for i in range(board.shape[0]) if board_scores[
i] == best_score]
if heuristics_used.sum() == 0 or (best_score == 1 and heuristics_used[best_moves].sum() == 0):
self.cache[board_str] = best_moves, best_score
return best_moves, best_score
def act(self, board, player):
return np.random.choice(self.score(board, player, 0, -2, -2)[0])
def main():
minimax = MiniMax()
random = RandomMove()
print('\t\t\t\twin/draw/lose')
print('minimax vs. minimax', test([minimax, minimax]))
print('random vs. minimax', test([random, minimax]))
print('minimax vs. random', test([minimax, random]))
if __name__ == "__main__":
main()