Skip to content

Commit 623d2f2

Browse files
change weightboard to numpy format
1 parent ab8fe5a commit 623d2f2

File tree

1 file changed

+35
-190
lines changed

1 file changed

+35
-190
lines changed

AI.py

Lines changed: 35 additions & 190 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
## 2048 game AI
2-
32
import random
43
import copy
54
import time
5+
import numpy as np
66
from board import Board
77

88

@@ -39,7 +39,7 @@ def weightBoard1(size):
3939
for col in range(size):
4040
exp = board[row][col]
4141
board[row][col] = 4**exp
42-
return board
42+
return np.array(board)
4343

4444
self.weightBoard = weightBoard1(self.size)
4545

@@ -148,11 +148,7 @@ def getMaxMove1(self):
148148
"""
149149

150150
def evaluate(self):
151-
score = 0
152-
for row in range(self.size):
153-
for col in range(self.size):
154-
score += self.weightBoard[row][col] * self.GameBoard.board[row][col]
155-
return score
151+
return np.multiply(self.weightBoard, self.GameBoard.board).sum()
156152

157153
# Advanced Beginner AI: greedy search based on game board scores with weights
158154
def getMaxMove2(self):
@@ -183,156 +179,6 @@ def getMaxMove2(self):
183179
self.performAction(bestAction)
184180

185181
### ExpectiMax & miniMax algorithm.
186-
# Reference: http://cs229.stanford.edu/proj2016/report/NieHouAn-AIPlays2048-report.pdf
187-
188-
"""
189-
2021.12.22 update: we should consider the number of merges for each move and the
190-
number of empty tiles after each move of the player to be as many as possible in favor of the next move
191-
Reference: https://stackoverflow.com/questions/22342854/what-is-the-optimal-algorithm-for-the-game-2048/22389702#22389702
192-
"""
193-
194-
"""
195-
## ExpectiMax
196-
197-
# player's move
198-
def expectiMaxieMove(self, depth, importance):
199-
if not depth: return (self.evaluate(), None) # depth = 0
200-
201-
# get all legal actions and preserve the board
202-
originalScore = self.GameBoard.score
203-
204-
actions = self.getLegalMoves()
205-
206-
if not actions: return (self.evaluate(), None) # no legal actions
207-
208-
(bestScore, bestAction) = (-float('inf'), None)
209-
210-
for action in actions:
211-
beforeMoveBoard = copy.deepcopy(self.GameBoard.board)
212-
self.performAction(action)
213-
computerScore = self.expectiMinnieScore(depth, importance)
214-
self.GameBoard.board = beforeMoveBoard
215-
self.GameBoard.score = originalScore
216-
217-
if computerScore > bestScore:
218-
bestScore = computerScore
219-
bestAction = action
220-
221-
return (bestScore, bestAction)
222-
223-
# computer's move
224-
def expectiMinnieScore(self, depth, importance):
225-
if not depth: return (self.evaluate(), None) # depth = 0
226-
227-
originalScore = self.GameBoard.score
228-
229-
# even though the real computer will put the new numbers randomly,
230-
# we still assume that it can put 2 or 4 on any empty tile as it
231-
# wishes to make the board harder for player to solve.
232-
233-
importantIndices = self.getImporantIndices(importance)
234-
emptyTiles = [] # tuple list => empty tile coordinates
235-
for i in range(self.size):
236-
for j in range(self.size):
237-
# this tile is empty and is "important"
238-
if not self.GameBoard.board[i][j] and (i, j) in importantIndices:
239-
emptyTiles.append((i, j))
240-
actions = []
241-
for index in emptyTiles:
242-
# can add 2 or 4 on any empty tile
243-
actions.append((index, 2))
244-
actions.append((index, 4))
245-
246-
if not actions: return self.evaluate() # no legal actions
247-
248-
expectedScore = 0
249-
250-
for action in actions:
251-
addNum = action[1]
252-
if addNum == 2: prob = 0.8
253-
elif addNum == 4: prob = 0.2
254-
else: assert(False) # should not reach here
255-
256-
beforeMoveBoard = copy.deepcopy(self.GameBoard.board)
257-
self.addNewNum(action) # perform computer's action
258-
(playerScore, _) = self.expectiMaxieMove(depth-1, importance)
259-
self.GameBoard.board = beforeMoveBoard
260-
self.GameBoard.score = originalScore
261-
262-
expectedScore += playerScore * prob
263-
264-
return expectedScore
265-
266-
## Minimax with alpha-beta pruning
267-
# player's move with alpha-beta pruning
268-
def maxieMoveAlphaBeta(self, depth, alpha, beta):
269-
assert(alpha < beta)
270-
if not depth: return (self.evaluate(), None) # depth = 0
271-
272-
# get all legal actions and preserve the board
273-
originalScore = self.GameBoard.score
274-
actions = self.getLegalMoves()
275-
276-
if not actions: return (self.evaluate(), None) # no legal actions
277-
278-
(bestScore, bestAction) = (-float('inf'), None)
279-
280-
for action in actions:
281-
beforeMoveBoard = copy.deepcopy(self.GameBoard.board)
282-
self.performAction(action)
283-
(computerScore, computerAction) = self.minnieMoveAlphaBeta(depth-1, alpha, beta)
284-
self.GameBoard.board = beforeMoveBoard
285-
self.GameBoard.score = originalScore
286-
287-
if computerScore > bestScore:
288-
bestScore = computerScore
289-
bestAction = action
290-
alpha = max(alpha, bestScore)
291-
if (alpha >= beta): break
292-
293-
return (bestScore, bestAction)
294-
295-
# computer's move with alpha-beta pruning
296-
def minnieMoveAlphaBeta(self, depth, alpha, beta):
297-
assert(alpha < beta)
298-
if not depth: return (self.evaluate(), None) # depth = 0
299-
300-
originalScore = self.GameBoard.score
301-
# even though the real computer will put the new numbers randomly,
302-
# we still assume that it can put 2 or 4 on any empty tile as it
303-
# wishes to make the board harder for player to solve.
304-
emptyTiles = [] # tuple list => empty tile coordinates
305-
for i in range(self.size):
306-
for j in range(self.size):
307-
if not self.GameBoard.board[i][j]: # this tile is empty
308-
emptyTiles.append((i, j))
309-
310-
actions = []
311-
for index in emptyTiles:
312-
# can add 2 or 4 on any empty tile
313-
actions.append((index, 2))
314-
actions.append((index, 4))
315-
316-
if not actions: return (self.evaluate(), None) # no legal actions
317-
318-
(bestScore, bestAction) = (float('inf'), None)
319-
320-
for action in actions:
321-
beforeMoveBoard = copy.deepcopy(self.GameBoard.board)
322-
self.addNewNum(action) # perform computer's action
323-
(playerScore, playerAction) = self.maxieMoveAlphaBeta(depth-1, alpha, beta)
324-
self.GameBoard.board = beforeMoveBoard
325-
self.GameBoard.score = originalScore
326-
327-
if playerScore < bestScore:
328-
bestScore = playerScore
329-
bestAction = action
330-
beta = min(beta, bestScore)
331-
if (alpha >= beta) : break
332-
333-
return (bestScore, bestAction)
334-
"""
335-
336182
"""
337183
Importance pruning: only take the computer's actions that affect the player's next move most negatively based on the weight of the empty tiles on the board.
338184
Reference : http://cs229.stanford.edu/proj2016/report/NieHouAn-AIPlays2048-report.pdf
@@ -459,11 +305,10 @@ def getMaxMove3(self):
459305
"""
460306

461307
def evaluate2(self):
462-
score = 0
463-
for row in range(self.size):
464-
for col in range(self.size):
465-
score += self.weightBoard[row][col] * self.GameBoard.board[row][col]
466-
return self.GameBoard.score * score
308+
return (
309+
self.GameBoard.score
310+
* np.multiply(self.weightBoard, self.GameBoard.board).sum()
311+
)
467312

468313
def getMaxMove4(self):
469314
score, action = self.maxieMoveAlphaBetaImportance(
@@ -542,45 +387,21 @@ def playTheGame(self):
542387
# print("winrate: ", winrate)
543388
# print("--- %s seconds ---" % (time.time()-startTime))
544389

545-
# competent AI plays 20 times
546-
startTime = time.time()
547-
winLose, record, scores = [], [], []
548-
for i in range(20):
549-
currTrialStartTime = time.time()
550-
testBoard = Board(4)
551-
competentAI = AI(testBoard, 2)
552-
res = competentAI.playTheGame()
553-
winLose.append(res[0])
554-
record.append(res[1])
555-
scores.append(res[2])
556-
print(
557-
"---Current trial time: %s seconds ---" % (time.time() - currTrialStartTime)
558-
)
559-
print("Competent AI:")
560-
print("winLose: ", winLose)
561-
print("record:", record)
562-
print("scores:", scores)
563-
avgscore = sum(scores) / len(scores)
564-
print("average score: ", avgscore)
565-
winrate = sum(winLose) / len(record)
566-
print("winrate: ", winrate)
567-
print("---Total time: %s seconds ---" % (time.time() - startTime))
568-
569-
# # proficient AI play 20 times
390+
# # competent AI plays 20 times
570391
# startTime = time.time()
571392
# winLose, record, scores = [], [], []
572393
# for i in range(20):
573394
# currTrialStartTime = time.time()
574395
# testBoard = Board(4)
575-
# proficientAI = AI(testBoard, 3)
576-
# res = proficientAI.playTheGame()
396+
# competentAI = AI(testBoard, 2)
397+
# res = competentAI.playTheGame()
577398
# winLose.append(res[0])
578399
# record.append(res[1])
579400
# scores.append(res[2])
580401
# print(
581402
# "---Current trial time: %s seconds ---" % (time.time() - currTrialStartTime)
582403
# )
583-
# print("Proficient AI:")
404+
# print("Competent AI:")
584405
# print("winLose: ", winLose)
585406
# print("record:", record)
586407
# print("scores:", scores)
@@ -589,3 +410,27 @@ def playTheGame(self):
589410
# winrate = sum(winLose) / len(record)
590411
# print("winrate: ", winrate)
591412
# print("---Total time: %s seconds ---" % (time.time() - startTime))
413+
414+
# proficient AI play 20 times
415+
startTime = time.time()
416+
winLose, record, scores = [], [], []
417+
for i in range(20):
418+
currTrialStartTime = time.time()
419+
testBoard = Board(4)
420+
proficientAI = AI(testBoard, 3)
421+
res = proficientAI.playTheGame()
422+
winLose.append(res[0])
423+
record.append(res[1])
424+
scores.append(res[2])
425+
print(
426+
"---Current trial time: %s seconds ---" % (time.time() - currTrialStartTime)
427+
)
428+
print("Proficient AI:")
429+
print("winLose: ", winLose)
430+
print("record:", record)
431+
print("scores:", scores)
432+
avgscore = sum(scores) / len(scores)
433+
print("average score: ", avgscore)
434+
winrate = sum(winLose) / len(record)
435+
print("winrate: ", winrate)
436+
print("---Total time: %s seconds ---" % (time.time() - startTime))

0 commit comments

Comments
 (0)