@@ -28,7 +28,7 @@ class Greedy(Player):
28
28
"manipulates_state" : False ,
29
29
}
30
30
31
- UNIFORM = np . inf # constant that replaces weight when rewards aren't weighted
31
+ UNIFORM = - 1.0 # constant that replaces weight when rewards aren't weighted
32
32
33
33
def __init__ (
34
34
self ,
@@ -46,16 +46,16 @@ def __init__(
46
46
recency_weight
47
47
0.0 <= recency_weight <= 1.0
48
48
The exponential recency weight used in calculating the average reward.
49
- If this argument is not provided, the player will not weigh rewards based on recency.
49
+ If this argument is equal to -1 or is not provided, the player will not weigh rewards based on recency.
50
50
"""
51
51
super ().__init__ ()
52
52
self ._rewards = {C : init_c_reward , D : init_d_reward }
53
53
self .weight = recency_weight
54
54
55
- # treat out of range values as extremes
56
- if self .weight <= 0 :
55
+ # limit parameter value range
56
+ if ( self . weight != self . UNIFORM ) and self .weight <= 0 :
57
57
self .weight = 0.0
58
- if ( not np . isinf ( self .weight )) and ( self . weight >= 1 ) :
58
+ if self .weight >= 1 :
59
59
self .weight = 1.0
60
60
61
61
def update_rewards (self , opponent : Player ):
@@ -66,7 +66,7 @@ def update_rewards(self, opponent: Player):
66
66
last_score = game .score (last_round )[0 ]
67
67
68
68
# if UNIFORM, use 1 / total number of times the updated action was taken previously
69
- if np . isinf ( self .weight ) :
69
+ if self .weight == self . UNIFORM :
70
70
weight = 1 / (
71
71
self .history .cooperations if last_play == C else self .history .defections
72
72
)
@@ -78,7 +78,6 @@ def update_rewards(self, opponent: Player):
78
78
)
79
79
80
80
def strategy (self , opponent : Player ) -> Action :
81
- """Actual strategy definition that determines player's action."""
82
81
# if not the first turn
83
82
if len (self .history ) != 0 :
84
83
self .update_rewards (opponent )
@@ -89,7 +88,7 @@ def strategy(self, opponent: Player) -> Action:
89
88
90
89
class EpsilonGreedy (Greedy ):
91
90
"""
92
- Has a 1 - epsilon probability of behaving like Greedy(), and plays randomly otherwise .
91
+ Has a 1 - epsilon probability of behaving like Greedy; otherwise, randomly choose to cooperate or defect .
93
92
94
93
Names:
95
94
@@ -144,7 +143,6 @@ def _post_init(self):
144
143
self .classifier ["stochastic" ] = False
145
144
146
145
def strategy (self , opponent : Player ) -> Action :
147
- """Actual strategy definition that determines player's action."""
148
146
# this will also update the reward appropriately
149
147
greedy_action = super ().strategy (opponent )
150
148
0 commit comments