1
1
## 2048 game AI
2
-
3
2
import random
4
3
import copy
5
4
import time
5
+ import numpy as np
6
6
from board import Board
7
7
8
8
@@ -39,7 +39,7 @@ def weightBoard1(size):
39
39
for col in range (size ):
40
40
exp = board [row ][col ]
41
41
board [row ][col ] = 4 ** exp
42
- return board
42
+ return np . array ( board )
43
43
44
44
self .weightBoard = weightBoard1 (self .size )
45
45
@@ -148,11 +148,7 @@ def getMaxMove1(self):
148
148
"""
149
149
150
150
def evaluate (self ):
151
- score = 0
152
- for row in range (self .size ):
153
- for col in range (self .size ):
154
- score += self .weightBoard [row ][col ] * self .GameBoard .board [row ][col ]
155
- return score
151
+ return np .multiply (self .weightBoard , self .GameBoard .board ).sum ()
156
152
157
153
# Advanced Beginner AI: greedy search based on game board scores with weights
158
154
def getMaxMove2 (self ):
@@ -183,156 +179,6 @@ def getMaxMove2(self):
183
179
self .performAction (bestAction )
184
180
185
181
### ExpectiMax & miniMax algorithm.
186
- # Reference: http://cs229.stanford.edu/proj2016/report/NieHouAn-AIPlays2048-report.pdf
187
-
188
- """
189
- 2021.12.22 update: we should consider the number of merges for each move and the
190
- number of empty tiles after each move of the player to be as many as possible in favor of the next move
191
- Reference: https://stackoverflow.com/questions/22342854/what-is-the-optimal-algorithm-for-the-game-2048/22389702#22389702
192
- """
193
-
194
- """
195
- ## ExpectiMax
196
-
197
- # player's move
198
- def expectiMaxieMove(self, depth, importance):
199
- if not depth: return (self.evaluate(), None) # depth = 0
200
-
201
- # get all legal actions and preserve the board
202
- originalScore = self.GameBoard.score
203
-
204
- actions = self.getLegalMoves()
205
-
206
- if not actions: return (self.evaluate(), None) # no legal actions
207
-
208
- (bestScore, bestAction) = (-float('inf'), None)
209
-
210
- for action in actions:
211
- beforeMoveBoard = copy.deepcopy(self.GameBoard.board)
212
- self.performAction(action)
213
- computerScore = self.expectiMinnieScore(depth, importance)
214
- self.GameBoard.board = beforeMoveBoard
215
- self.GameBoard.score = originalScore
216
-
217
- if computerScore > bestScore:
218
- bestScore = computerScore
219
- bestAction = action
220
-
221
- return (bestScore, bestAction)
222
-
223
- # computer's move
224
- def expectiMinnieScore(self, depth, importance):
225
- if not depth: return (self.evaluate(), None) # depth = 0
226
-
227
- originalScore = self.GameBoard.score
228
-
229
- # even though the real computer will put the new numbers randomly,
230
- # we still assume that it can put 2 or 4 on any empty tile as it
231
- # wishes to make the board harder for player to solve.
232
-
233
- importantIndices = self.getImporantIndices(importance)
234
- emptyTiles = [] # tuple list => empty tile coordinates
235
- for i in range(self.size):
236
- for j in range(self.size):
237
- # this tile is empty and is "important"
238
- if not self.GameBoard.board[i][j] and (i, j) in importantIndices:
239
- emptyTiles.append((i, j))
240
- actions = []
241
- for index in emptyTiles:
242
- # can add 2 or 4 on any empty tile
243
- actions.append((index, 2))
244
- actions.append((index, 4))
245
-
246
- if not actions: return self.evaluate() # no legal actions
247
-
248
- expectedScore = 0
249
-
250
- for action in actions:
251
- addNum = action[1]
252
- if addNum == 2: prob = 0.8
253
- elif addNum == 4: prob = 0.2
254
- else: assert(False) # should not reach here
255
-
256
- beforeMoveBoard = copy.deepcopy(self.GameBoard.board)
257
- self.addNewNum(action) # perform computer's action
258
- (playerScore, _) = self.expectiMaxieMove(depth-1, importance)
259
- self.GameBoard.board = beforeMoveBoard
260
- self.GameBoard.score = originalScore
261
-
262
- expectedScore += playerScore * prob
263
-
264
- return expectedScore
265
-
266
- ## Minimax with alpha-beta pruning
267
- # player's move with alpha-beta pruning
268
- def maxieMoveAlphaBeta(self, depth, alpha, beta):
269
- assert(alpha < beta)
270
- if not depth: return (self.evaluate(), None) # depth = 0
271
-
272
- # get all legal actions and preserve the board
273
- originalScore = self.GameBoard.score
274
- actions = self.getLegalMoves()
275
-
276
- if not actions: return (self.evaluate(), None) # no legal actions
277
-
278
- (bestScore, bestAction) = (-float('inf'), None)
279
-
280
- for action in actions:
281
- beforeMoveBoard = copy.deepcopy(self.GameBoard.board)
282
- self.performAction(action)
283
- (computerScore, computerAction) = self.minnieMoveAlphaBeta(depth-1, alpha, beta)
284
- self.GameBoard.board = beforeMoveBoard
285
- self.GameBoard.score = originalScore
286
-
287
- if computerScore > bestScore:
288
- bestScore = computerScore
289
- bestAction = action
290
- alpha = max(alpha, bestScore)
291
- if (alpha >= beta): break
292
-
293
- return (bestScore, bestAction)
294
-
295
- # computer's move with alpha-beta pruning
296
- def minnieMoveAlphaBeta(self, depth, alpha, beta):
297
- assert(alpha < beta)
298
- if not depth: return (self.evaluate(), None) # depth = 0
299
-
300
- originalScore = self.GameBoard.score
301
- # even though the real computer will put the new numbers randomly,
302
- # we still assume that it can put 2 or 4 on any empty tile as it
303
- # wishes to make the board harder for player to solve.
304
- emptyTiles = [] # tuple list => empty tile coordinates
305
- for i in range(self.size):
306
- for j in range(self.size):
307
- if not self.GameBoard.board[i][j]: # this tile is empty
308
- emptyTiles.append((i, j))
309
-
310
- actions = []
311
- for index in emptyTiles:
312
- # can add 2 or 4 on any empty tile
313
- actions.append((index, 2))
314
- actions.append((index, 4))
315
-
316
- if not actions: return (self.evaluate(), None) # no legal actions
317
-
318
- (bestScore, bestAction) = (float('inf'), None)
319
-
320
- for action in actions:
321
- beforeMoveBoard = copy.deepcopy(self.GameBoard.board)
322
- self.addNewNum(action) # perform computer's action
323
- (playerScore, playerAction) = self.maxieMoveAlphaBeta(depth-1, alpha, beta)
324
- self.GameBoard.board = beforeMoveBoard
325
- self.GameBoard.score = originalScore
326
-
327
- if playerScore < bestScore:
328
- bestScore = playerScore
329
- bestAction = action
330
- beta = min(beta, bestScore)
331
- if (alpha >= beta) : break
332
-
333
- return (bestScore, bestAction)
334
- """
335
-
336
182
"""
337
183
Importance pruning: only take the computer's actions that affect the player's next move most negatively based on the weight of the empty tiles on the board.
338
184
Reference : http://cs229.stanford.edu/proj2016/report/NieHouAn-AIPlays2048-report.pdf
@@ -459,11 +305,10 @@ def getMaxMove3(self):
459
305
"""
460
306
461
307
def evaluate2 (self ):
462
- score = 0
463
- for row in range (self .size ):
464
- for col in range (self .size ):
465
- score += self .weightBoard [row ][col ] * self .GameBoard .board [row ][col ]
466
- return self .GameBoard .score * score
308
+ return (
309
+ self .GameBoard .score
310
+ * np .multiply (self .weightBoard , self .GameBoard .board ).sum ()
311
+ )
467
312
468
313
def getMaxMove4 (self ):
469
314
score , action = self .maxieMoveAlphaBetaImportance (
@@ -542,45 +387,21 @@ def playTheGame(self):
542
387
# print("winrate: ", winrate)
543
388
# print("--- %s seconds ---" % (time.time()-startTime))
544
389
545
- # competent AI plays 20 times
546
- startTime = time .time ()
547
- winLose , record , scores = [], [], []
548
- for i in range (20 ):
549
- currTrialStartTime = time .time ()
550
- testBoard = Board (4 )
551
- competentAI = AI (testBoard , 2 )
552
- res = competentAI .playTheGame ()
553
- winLose .append (res [0 ])
554
- record .append (res [1 ])
555
- scores .append (res [2 ])
556
- print (
557
- "---Current trial time: %s seconds ---" % (time .time () - currTrialStartTime )
558
- )
559
- print ("Competent AI:" )
560
- print ("winLose: " , winLose )
561
- print ("record:" , record )
562
- print ("scores:" , scores )
563
- avgscore = sum (scores ) / len (scores )
564
- print ("average score: " , avgscore )
565
- winrate = sum (winLose ) / len (record )
566
- print ("winrate: " , winrate )
567
- print ("---Total time: %s seconds ---" % (time .time () - startTime ))
568
-
569
- # # proficient AI play 20 times
390
+ # # competent AI plays 20 times
570
391
# startTime = time.time()
571
392
# winLose, record, scores = [], [], []
572
393
# for i in range(20):
573
394
# currTrialStartTime = time.time()
574
395
# testBoard = Board(4)
575
- # proficientAI = AI(testBoard, 3 )
576
- # res = proficientAI .playTheGame()
396
+ # competentAI = AI(testBoard, 2 )
397
+ # res = competentAI .playTheGame()
577
398
# winLose.append(res[0])
578
399
# record.append(res[1])
579
400
# scores.append(res[2])
580
401
# print(
581
402
# "---Current trial time: %s seconds ---" % (time.time() - currTrialStartTime)
582
403
# )
583
- # print("Proficient AI:")
404
+ # print("Competent AI:")
584
405
# print("winLose: ", winLose)
585
406
# print("record:", record)
586
407
# print("scores:", scores)
@@ -589,3 +410,27 @@ def playTheGame(self):
589
410
# winrate = sum(winLose) / len(record)
590
411
# print("winrate: ", winrate)
591
412
# print("---Total time: %s seconds ---" % (time.time() - startTime))
413
+
414
+ # proficient AI play 20 times
415
+ startTime = time .time ()
416
+ winLose , record , scores = [], [], []
417
+ for i in range (20 ):
418
+ currTrialStartTime = time .time ()
419
+ testBoard = Board (4 )
420
+ proficientAI = AI (testBoard , 3 )
421
+ res = proficientAI .playTheGame ()
422
+ winLose .append (res [0 ])
423
+ record .append (res [1 ])
424
+ scores .append (res [2 ])
425
+ print (
426
+ "---Current trial time: %s seconds ---" % (time .time () - currTrialStartTime )
427
+ )
428
+ print ("Proficient AI:" )
429
+ print ("winLose: " , winLose )
430
+ print ("record:" , record )
431
+ print ("scores:" , scores )
432
+ avgscore = sum (scores ) / len (scores )
433
+ print ("average score: " , avgscore )
434
+ winrate = sum (winLose ) / len (record )
435
+ print ("winrate: " , winrate )
436
+ print ("---Total time: %s seconds ---" % (time .time () - startTime ))
0 commit comments