-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot_epsilons.py
45 lines (38 loc) · 1.43 KB
/
plot_epsilons.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# Run this on Spyder
"""
Problem3:-
Develop a 10-armed bandit in which all ten mean-rewards start out equal and
then take independent random walks (by adding a normally distributed increment
with mean zero and standard deviation 0.01 to all mean-rewards on each time step).
{function [value] = bandit_nonstat(action)}
"""
from Environment import N_Armed_Bandit
from Agent import Agents
import matplotlib.pyplot as plt
if __name__ == "__main__":
no_arms = 10
agent_name = "epsilon_greedy"
epsilon = .4 # How many times to explore
discount_factor = 0.7 # future reward importance
random_state = 100
_mean = 0
_std = 0.01
no_iterations = 1000
verbose = 0
manual_reward = "--|--" #[1, 0] # will override random reward assignment
avg_rewards = []
for epsilon in [0,0.01,0.1,0.5,0.9]:
e = N_Armed_Bandit(no_arms, random_state,_mean, _std, manual_reward = manual_reward)
# create agent
a = Agents(agent_name, epsilon, discount_factor, e)
avg_rewards.append(a.train(no_iterations,verbose))
print("This is avg_rewards")
plt.plot(avg_rewards[0],label="epsilon:0")
plt.plot(avg_rewards[1],label="epsilon:0.01")
plt.plot(avg_rewards[2],label="epsilon:0.1")
plt.plot(avg_rewards[3],label="epsilon:0.5")
plt.plot(avg_rewards[4],label="epsilon:0.9")
plt.ylabel("avg reward")
plt.xlabel("epochs")
plt.legend(loc="upper right")
plt.show()