-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathProjectPYTHON.py
135 lines (119 loc) · 4.65 KB
/
ProjectPYTHON.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# As described in the PDF file in the same Repository
import numpy
class node: # Making a node object lets us easily set up a Markov Chain
def __init__(self):
self.me = [[[0,0],[0,0]],[[0,0],[0,0]]] # This will be filled with 0s and 1s
self.forward = []
self.back = []
self.up = []
self.down = [] # These lists will be populated with each nodes directed edges
self.left = []
self.right = []
def myOnes(self): # A quick method to check how many 1s in a node
total = int(self.me[0][0][0]) + int(self.me[0][0][1]) + int(self.me[0][1][0]) + int(self.me[0][1][1]) + int(self.me[1][0][0]) + int(self.me[1][0][1]) + int(self.me[1][1][0]) + int(self.me[1][1][1])
return total
def show(self): # Prints the node in a reader-friendly way. The left layer is the 'front'
print("(", self.me[0][0], " ", self.me[1][0], ")")
print("(", self.me[0][1], " ", self.me[1][1], ")")
print()
nodeList = [] # A list for all the nodes
for i in range(0,256):
placeholder = node()
value = str(bin(i))[2:] # i from 0 to 255, value from 00000000 to 11111111 (binary)
while(len(value)<8):
value = "0" + value
placeholder.me = [[[value[0],value[1]],[value[2],value[3]]],[[value[4],value[5]],[value[6],value[7]]]]
nodeList.append(placeholder)
for i in nodeList:
for j in range(0,16):
value = str(bin(j))[2:]
while(len(value)<4):
value = "0" + value
findMeF = [[[value[0],value[1]],[value[2],value[3]]],[[i.me[0][0][0],i.me[0][0][1]],[i.me[0][1][0],i.me[0][1][1]]]]
findMeB = [[[i.me[1][0][0],i.me[1][0][1]],[i.me[1][1][0],i.me[1][1][1]]],[[value[0],value[1]],[value[2],value[3]]]]
findMeU = [[[value[0],value[1]],[i.me[0][0][0],i.me[0][0][1]]],[[value[2],value[3]],[i.me[1][0][0],i.me[1][0][1]]]]
findMeD = [[[i.me[0][1][0],i.me[0][1][1]],[value[0],value[1]]],[[i.me[1][1][0],i.me[1][1][1]],[value[2],value[3]]]]
findMeR = [[[i.me[0][0][1],value[0]],[i.me[0][1][1],value[1]]],[[i.me[1][0][1],value[2]],[i.me[1][1][1],value[3]]]]
findMeL = [[[value[0],i.me[0][0][0],],[value[1],i.me[0][1][0]]],[[value[2],i.me[1][0][0]],[value[3],i.me[1][1][0]]]]
for k in nodeList:
if(k.me == findMeF):
i.forward.append(k)
if(k.me == findMeB):
i.back.append(k)
if(k.me == findMeU):
i.up.append(k)
if(k.me == findMeD):
i.down.append(k)
if(k.me == findMeR):
i.right.append(k)
if(k.me == findMeL):
i.left.append(k) # That ugly block adds all directed edges, populating those lists
# At this point, all nodes and their connections are established
def makeRandomPolicy(): # Relates each state to a random direction
randomPolicy = []
for i in nodeList:
randomPolicy.append(numpy.random.choice(["F","B","U","D","R","L"]))
return randomPolicy
def tedPolicy(): # Relates each state to the direction with the most 1s
myPolicy = []
for i in nodeList:
expectF = 0
expectB = 0
expectU = 0
expectD = 0
expectR = 0
expectL = 0
for a in i.forward:
expectF += a.myOnes()
for a in i.back:
expectB += a.myOnes()
for a in i.up:
expectU += a.myOnes()
for a in i.down:
expectD += a.myOnes()
for a in i.right:
expectR += a.myOnes()
for a in i.left:
expectL += a.myOnes()
decision = "F"
if(expectB > expectF):
decision = "B"
if(expectU > expectB):
decision = "U"
if(expectD > expectU):
decision = "D"
if(expectR > expectD):
decision = "R"
if(expectL > expectR):
decision = "L"
myPolicy.append(decision) # Defaults to L,R,D,U,B (in that order) in case
# two directions have equal expectation
return myPolicy
def runProcess(choiceList): # Given a policy, runs the process!
time = 0
state = nodeList[0] # Start on the all-zero node
reward = 0
while(time < 1000): # Continues forever. We only observe a portion.
print("Time: ", time)
print()
state.show()
decision = choiceList[nodeList.index(state)]
print(decision)
if(decision == "F"):
state = numpy.random.choice(state.forward)
elif(decision == "B"):
state = numpy.random.choice(state.back)
elif(decision == "U"):
state = numpy.random.choice(state.up)
elif(decision == "D"):
state = numpy.random.choice(state.down)
elif(decision == "R"):
state = numpy.random.choice(state.right)
elif(decision == "L"):
state = numpy.random.choice(state.left)
reward += 2*state.myOnes() - 8 # Total 1s minus total 0s
print()
print("Reward this step: ", 2*state.myOnes() - 8)
print("Total reward: ", reward)
print()
time += 1