-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtic-tac-toe-gp
574 lines (496 loc) · 21.8 KB
/
tic-tac-toe-gp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
; GP System implementation by Hammad Ahmad and Brandon Lefore.
; This system tries to evolve a population of players that play a 3-dimentional game of
; Tic Tac Toe.
; The major modifications to this system from the originial basic GP implementation, in
; addition to changing almost every function to implement the 3D Tic Tac Toe, are:
; Fitness Proportionate Selection and Koza 90/10 selection.
; The system runs for a certain number of maximum generations before outputting the best
; player evolved so far.
; Note that the output player is the best among the population, but it may not necessarily be
; good. This is because we run the evolution with a small population size and a relatively
; small number of generations (because of the very large number of computations that result
; in a long running time of the system). Theoretically, if the evolution was run for hours
; with a large initial population, the output player should be very good.
(ns tic-tac-toe.core)
(defn make-program-into-fn
"Takes a GP program represented as a list, and transforms it into a function that
can be called (with no inputs)."
[program]
(eval (list 'fn '[] program)))
(def line-separator
"-------------------------------------------------------------------------")
(defn get-vec-count
"A helper function for program-size. Returns the number of vectors (indexes) in a program."
[program]
(let [flat-prog (flatten program)]
; get the number of integers and return that value divided by 3,
; since each vector has 3 integers
(/ (count (filter integer? flat-prog)) 3)))
(defn program-size
"Finds the size of the program, i.e. number of nodes in its tree."
[prog]
(if (not (seq? prog))
1
(let [size (count (flatten prog))]
(- size (* (get-vec-count prog) 2)))))
(def game ; an atom that keeps track of the game state
(atom nil))
(def player ; an atom that keeps track of the current player making the turn
(atom nil))
(def indexes ; index values on the board -- this is the terminal set.
[[0 0 0] [0 0 1] [0 0 2] [0 1 0] [0 1 1] [0 1 2] [0 2 0] [0 2 1] [0 2 2]
[1 0 0] [1 0 1] [1 0 2] [1 1 0] [1 1 1] [1 1 2] [1 2 0] [1 2 1] [1 2 2]
[2 0 0] [2 0 1] [2 0 2] [2 1 0] [2 1 1] [2 1 2] [2 2 0] [2 2 1] [2 2 2]])
(defn select-terminal
"Selects a random terminal for the program -- the terminal is any index value."
[]
(rand-nth indexes))
(def all-winning-indexes [ ; all possible winning combinations for the game
; y-axis
[[0 0 0] [1 0 0] [2 0 0]]
[[0 1 0] [1 1 0] [2 1 0]]
[[0 2 0] [1 2 0] [2 2 0]]
[[0 0 1] [1 0 1] [2 0 1]]
[[0 1 1] [1 1 1] [2 1 1]]
[[0 2 1] [1 2 1] [2 2 1]]
[[0 0 2] [1 0 2] [2 0 2]]
[[0 1 2] [1 1 2] [2 1 2]]
[[0 2 2] [1 2 2] [2 2 2]]
; x-axis
[[0 0 0] [0 1 0] [0 2 0]]
[[1 0 0] [1 1 0] [1 2 0]]
[[2 0 0] [2 1 0] [2 2 0]]
[[0 0 1] [0 1 1] [0 2 1]]
[[1 0 1] [1 1 1] [1 2 1]]
[[2 0 1] [2 1 1] [2 2 1]]
[[0 0 2] [0 1 2] [0 2 2]]
[[1 0 2] [1 1 2] [1 2 2]]
[[2 0 2] [2 1 2] [2 2 2]]
; z-axis
[[0 0 0] [0 0 1] [0 0 2]]
[[1 0 0] [1 0 1] [1 0 2]]
[[2 0 0] [2 0 1] [2 0 2]]
[[0 1 0] [0 1 1] [0 1 2]]
[[1 1 0] [1 1 1] [1 1 2]]
[[2 1 0] [2 1 1] [2 1 2]]
[[0 2 0] [0 2 1] [0 2 2]]
[[1 2 0] [1 2 1] [1 2 2]]
[[2 2 0] [2 2 1] [2 2 2]]
; diagonals-xy
[[0 0 0] [1 1 0] [2 2 0]]
[[2 0 0] [1 1 0] [0 2 0]]
[[0 0 1] [1 1 1] [2 2 1]]
[[2 0 1] [1 1 1] [0 2 1]]
[[0 0 2] [1 1 2] [2 2 2]]
[[2 0 2] [1 1 2] [0 2 2]]
; diagonals-xz
[[0 0 0] [0 1 1] [0 2 2]]
[[0 0 2] [0 1 1] [0 2 0]]
[[1 0 0] [1 1 1] [1 2 2]]
[[1 0 2] [1 1 1] [1 2 0]]
[[2 0 0] [2 1 1] [2 2 2]]
[[2 0 2] [2 1 1] [2 2 0]]
; diagonals-yz
[[0 0 0] [1 0 1] [2 0 2]]
[[0 0 2] [1 0 1] [2 0 0]]
[[0 1 0] [1 1 1] [2 1 2]]
[[0 1 2] [1 1 1] [2 1 0]]
[[0 2 0] [1 2 1] [2 2 2]]
[[0 2 2] [1 2 1] [2 2 0]]
; diagonals-xyz
[[0 0 0] [1 1 1] [2 2 2]]
[[0 2 2] [1 1 1] [2 0 0]]
[[0 0 2] [1 1 1] [2 2 0]]
[[2 0 2] [1 1 1] [0 2 0]]
])
(defn create-game-board
"Creates the 3x3x3 Tic-Tac-Toe game board."
[]
[[[nil nil nil] [nil nil nil] [nil nil nil]]
[[nil nil nil] [nil nil nil] [nil nil nil]]
[[nil nil nil] [nil nil nil] [nil nil nil]]])
(def board ; an atom that keeps track of the game board
(atom (create-game-board)))
(defn get-value-at-index
"Gets the token value in the board at a particular index."
[index]
(get-in @board index))
(defn valid-spot?
"Determines whether or not the spot is empty (and hence, valid) for a player move."
[index]
(= nil (get-value-at-index index)))
(defn all-spots-taken?
"Determines whether all the spots on the board are occupied with tokens."
[]
; for the board to be fully occupied, the sequence returned by calling flatten on board
; should not have any nils in it.
(empty? (filter (fn [index] (= nil index)) (flatten @board))))
(defn choose-random-spot
"Returns an index representing a random, unoccupied spot on the board."
[]
(loop [index (rand-nth indexes)]
(if (= nil (get-value-at-index index))
index
(recur (rand-nth indexes))))) ; recurse until a valid spot is found
(def instruction-set ; a map of the function sets and the number of arguments to each function
'{if-mine-at-index 3
if-theirs-at-index 3
if-mine-at-indexes 4
if-theirs-at-indexes 4
if-index-empty 2})
(defn if-mine-at-index
"If player token is at index1, return index2, else return index3."
[index1 index2 index3]
(if (= (get @player :token) (get-value-at-index index1))
index2
index3))
(defn if-theirs-at-index
"If opponent token is at index1, return index2, else return index3."
[index1 index2 index3]
(if (and ; neither player token nor a nil is present at index1
; this means that the opponent token must be present at index1
(not (= (get @player :token) (get-value-at-index index1)))
(not (= nil (get-value-at-index index1))))
index2
index3))
(defn if-mine-at-indexes
"If player token is at index1 and index2, return index3, else return index4."
[index1 index2 index3 index4]
(if (and
(= (get @player :token) (get-value-at-index index1))
(= (get @player :token) (get-value-at-index index2)))
index3
index4))
(defn if-theirs-at-indexes
"If opponent token is at index1 and index2, return index3, else return index4."
[index1 index2 index3 index4]
(if (and
(and
(not (= (get @player :token) (get-value-at-index index1)))
(not (= nil (get-value-at-index index1))))
(and
(not (= (get @player :token) (get-value-at-index index2)))
(not (= nil (get-value-at-index index2)))))
index3
index4))
(defn if-index-empty
"If index1 is empty, return index1, else return index2."
[index1 index2]
(if (= nil (get-value-at-index index1))
index1
index2))
(def function-set ; a list of the functions that make up the function set
(list 'if-mine-at-index 'if-theirs-at-index 'if-mine-at-indexes 'if-theirs-at-indexes
'if-index-empty))
(defn select-fun
"Selects a random function for the program (from the function set)."
[]
(rand-nth function-set))
(defn generate-prog
"Generates a program of the given depth, using random functions and terminals."
[depth]
(if (zero? depth)
(select-terminal)
(let [func (select-fun)]
(conj ; select a random function and recursively call generate-prog n times to
; fill out the subchildren of the node, where n = number of arguments the function
; takes.
(repeatedly (get instruction-set func)
#(generate-prog (dec depth)))
func))))
(defn create-player
"Creates a Tic-Tac-Toe computer player.
Each player is represented as an atom so that the win and loss count of the players can
be updated after every game."
[char strategy]
(atom {:token char
:strategy strategy
:wins 0
:losses 0}))
(defn new-game
"Initializes a new Tic-Tac-Toe game."
[player1 player2]
(reset! board (create-game-board)) ; reset the atom that monitors the board state globally
{:board @board
:player1 player1
:player2 player2})
(defn get-other-player
"Resets the player atom to the player not making the turn (i.e., the other player)."
[]
(if (= player (get @game :player1))
(reset! player (get @game :player2))
(reset! player (get @game :player1))))
(defn take-turn
"Takes a turn by placing the player token on the board, and then changing the current
player variable."
[index the-player]
(if (valid-spot? index)
(reset! board (assoc-in @board index (get @the-player :token)))
; if the spot is already occupied, choose a random spot and place the token there
(reset! board (assoc-in @board (choose-random-spot) (get @the-player :token))))
(get-other-player) ; change/reset the current player variable
@board) ; return the board after taking the turn
; We got help from:
; https://github.com/trptcolin/tictactoe-clojure/blob/master/src/tictactoe/core.clj
; to get helper functions to check for a winner. Our version takes a basic idea from
; the cited source, but changes it to work with atoms and a 3D version of Tic Tac Toe.
(defn winner-exists?
"A helper function for winner. Determines whether or not a winner exists."
[indexes]
(apply = (map #(get-value-at-index %) indexes)))
(defn winner-on-indexes
"A helper function for winner. Returns the token of the winner if the winner exists on
the given index values."
[indexes]
(if (winner-exists? indexes)
(get-value-at-index (first indexes))
nil))
(defn winner
"Returns the winner of the game, or nil if there is no winner yet."
[]
(let [winning-token (some #(winner-on-indexes %) all-winning-indexes)]
; get the winning token by checking all winning combinations
(cond
(= winning-token (get @(get @game :player1) :token)) "Player 1"
(= winning-token (get @(get @game :player2) :token)) "Player 2"
:else nil)))
(defn game-over?
"Determines whether or not the game is over yet."
[]
(or
(all-spots-taken?)
(not (= nil (winner)))))
(defn update-wins-losses
"Updates the wins and losses counts for the two input players.
The winner is always the first argument, and the loser is the second."
[winner loser]
(reset! winner (assoc @winner :wins (inc (get @winner :wins))))
(reset! loser (assoc @loser :losses (inc (get @loser :losses)))))
(defn play-game
"Plays a game of 3D Tic Tac Toe."
[the-game]
(reset! game the-game) ; reset the atom that monitors the game state globally
(reset! player (get the-game :player1)) ; reset the player atom to player1
(loop [game-board @board
current (get the-game :player1)
other (get the-game :player2)]
(let [next-move-fn (make-program-into-fn
(get @current :strategy))] ; get player1's strategy function
(if (game-over?)
(let [winning-player (winner)
player1 (get the-game :player1)
player2 (get the-game :player2)]
(if (= winning-player "Player 1")
(update-wins-losses player1 player2) ; update the wins and losses counts for both
(update-wins-losses player2 player1)) ; players, depending on who won and who lost.
winning-player) ; return winner
(recur ; recurse through the function with:
(take-turn (next-move-fn) current) ; a board updated with the previous turn
other ; player1 and player2 alternating for the turns
current)))))
(defn play-against-each-other
"Makes a population of players play against each other."
[population]
(let [x-players (filter #(= (get @% :token) "X") population) ; separate all X-players...
o-players (filter #(= (get @% :token) "O") population)] ; and all O-players
(loop [first-player (rand-nth x-players)
second-player (rand-nth o-players)
; X and O players are chosen randomly. This does leave a possibility that some
; players may not get a chance to play any game at all (although this is not very
; likely), but it means that the element of randomness makes it possible for a
; particular individual to play individuals that have varied strategies (as
; opposed to playing similar individuals every time the function is called).
counter 0]
(if (< counter (* 10 (count population))) ; play 10x population count number of games
(do
(play-game (new-game first-player second-player))
(if (even? counter) ; alternate between:
(recur ; X-players going first, and
(rand-nth x-players)
(rand-nth o-players)
(inc counter))
(recur ; O-players going first.
(rand-nth o-players)
(rand-nth x-players)
(inc counter))))))))
(defn select-random-subtree
"Given a program, selects a random subtree and returns it."
([prog]
(select-random-subtree prog (rand-int (program-size prog))))
([prog subtree-index]
(cond
(not (seq? prog)) prog
(and (zero? subtree-index)
(some #{(first prog)} (keys instruction-set))) prog
(< subtree-index (program-size (first prog))) (recur (first prog)
subtree-index)
:else (recur (rest prog)
(- subtree-index (program-size (first prog)))))))
(defn replace-random-subtree
"Given a program and a replacement-subtree, replace a random node
in the program with the replacement-subtree."
([prog replacement-subtree]
(replace-random-subtree prog replacement-subtree (rand-int (program-size prog))))
([prog replacement-subtree subtree-index]
(cond
(not (seq? prog)) replacement-subtree
(zero? subtree-index) replacement-subtree
:else (map (fn [element start-index]
(if (<= start-index
subtree-index
(+ start-index -1 (program-size element)))
(replace-random-subtree element
replacement-subtree
(- subtree-index start-index))
element))
prog
(cons 0 (reductions + (map program-size prog)))))))
(defn select-node
"A helper function for koza-90-10.
Given a program, selects a random interior node (function) and returns it."
[prog]
(loop [node (select-random-subtree prog)]
; if the node is not a vector (i.e. index), return it
(if (not= (type node) clojure.lang.PersistentVector)
node
(recur (select-random-subtree prog)))))
(defn select-leaf
"A helper function for koza-90-10.
Given a program, selects a random leaf node (terminal) and returns it."
[prog]
(loop [node (select-random-subtree prog)]
; if the node is a vector (i.e. index), return it
(if (= (type node) clojure.lang.PersistentVector)
node
(recur (select-random-subtree prog)))))
(defn koza-90-10
"Selects a random subtree using the Koza 90/10 method.
There is a 90% chance that a function is returned, and a 10% chance that a terminal is
returned."
[prog]
(let [probability (rand)]
(if (< probability 0.9)
(select-node prog)
(select-leaf prog))))
(defn mutate
"Replaces a random node in the program by a random program of depth between 0 and
2 (inclusive)."
[prog]
(replace-random-subtree prog (generate-prog (rand-int 3))))
(defn crossover
"Replaces a random node in the program by a random node from another program.
Regular node selection is used 80% of the time, and Koza 90/10 is used 20% of the time."
[prog1 prog2]
(let [probability (rand)]
(if (< probability 0.8)
(replace-random-subtree prog1 (select-random-subtree prog2))
(replace-random-subtree prog1 (koza-90-10 prog2)))))
(defn eval-fitness
"Evaluates the fitness of an individual (the greater, the better).
A value of 0 indicates that the individual has not won any games yet.
A value of 1 indicates that the individual has won all the games it played."
[individual]
(let [wins (get @individual :wins)
losses (get @individual :losses)]
(if (= (+ wins losses) 0)
0
(/ wins (+ wins losses)))))
(defn pair-individuals-fitnesses
"A helper function for sort-population and fitness-proportionate-selection.
Returns a vector of [individual fitness] pairs."
[population]
(vec (map #(vector % (eval-fitness %)) population)))
(defn sort-population
"Sorts the population of players by descending order of fitness."
[population]
(let [individuals-fitnesses (pair-individuals-fitnesses population)]
(vec (map first ; get the individuals only (discards the fitness from [individual fitness])
(sort #(> (second %1) (second %2)) individuals-fitnesses)))))
(defn sort-population-by-x-and-o
"Sorts the population of players by Xs and Os."
[population]
(vec (concat
(filter #(= (get @% :token) "X") population)
(filter #(= (get @% :token) "O") population))))
(defn generate-pop
"Generates a population of players of the given population size.
Pre-requisite: pop-size should be an even number."
[pop-size]
(if (even? pop-size)
(vec (concat
(repeatedly (/ pop-size 2) #(create-player "X" (generate-prog 3)))
(repeatedly (/ pop-size 2) #(create-player "O" (generate-prog 3)))))
(println "Error: pop-size should be an even integer.")))
; We got help from:
; http://stackoverflow.com/questions/177271/roulette-selection-in-genetic-algorithms
; for a pseudocode algorithm for fitness proportionate selection, because we did not know how
; to deal with the probability equation discussed in class. We then implemented the
; selection method in Clojure after having referenced the above source.
(defn fitness-proportionate-selection
"Selects an individual from the population using fitness-proportionate selection."
([population]
(let [individuals-fitnesses (pair-individuals-fitnesses population)
; get the sum of all fitnesses of the population, and then get a random stop-at
; value to determine at what point to stop the selection loop
fitness-sum (reduce + (map second individuals-fitnesses))
stop-value (* (rand) fitness-sum)]
(loop [pairs individuals-fitnesses
counter 0] ; counter keeps track of how far we have got in the selection "wheel"
(let [individual (first (first pairs))
fitness (second (first pairs))]
; if the counter and the individual's fitness falls in the "stop region,"
; return the individual. otherwise, recur with the rest of the population and an
; incremented counter.
(if (> (+ counter fitness) stop-value)
individual
(recur (rest pairs) (+ counter fitness))))))))
(defn evolve
"Evolves the population of programs. The evolution runs for a certain number of maximum
generations before returning the current best individual."
[population-size max-generations mutation-ratio crossover-ratio]
(if (> (+ mutation-ratio crossover-ratio) 1)
(println "Error: The sum of mutation and crossover ratios should be <= 1.")
(loop [generation-number 0
population (sort-population-by-x-and-o (generate-pop population-size))]
(play-against-each-other population) ; make the individuals play against each other
(let [sorted-by-error-population (sort-population population)
best-individual (first sorted-by-error-population)
best-individual-fitness (eval-fitness best-individual)]
(println line-separator)
(println "Generation number:" generation-number)
(println "Best player so far:" @best-individual)
(println "Best fitness so far:" best-individual-fitness)
(cond
(= generation-number max-generations)
(println line-separator
"\nEvolution complete. \nBest player so far:\n"
@best-individual
"\nPlayer Fitness:" best-individual-fitness)
:else (recur
(inc generation-number)
(sort-population-by-x-and-o
(for [i (range population-size)]
(let [genetic-operator (rand) ; the probability of choosing a certain
; genetic operator
token (if (even? i) ; alternate between:
"X" ; token X, and
"O")] ; token Y, for the new players in the population.
; this ensures that the number of X and O players
; remains similar.
(cond
(< genetic-operator mutation-ratio) ; mutation
(let [parent (fitness-proportionate-selection population)
strategy (get @parent :strategy)]
(create-player token (mutate strategy)))
(< genetic-operator (+ mutation-ratio crossover-ratio)) ; crossover
(let [parent1 (fitness-proportionate-selection population)
parent2 (fitness-proportionate-selection population)
strategy1 (get @parent1 :strategy)
strategy2 (get @parent2 :strategy)]
(create-player token (crossover strategy1 strategy2)))
:else (fitness-proportionate-selection population))))))))))) ; reproduction
(defn genetic-programming
"The main function."
[]
(evolve 50 20 0.5 0.4))
(genetic-programming)