@@ -28,7 +28,7 @@ class Greedy(Player):
2828 "manipulates_state" : False ,
2929 }
3030
31- UNIFORM = np . inf # constant that replaces weight when rewards aren't weighted
31+ UNIFORM = - 1.0 # constant that replaces weight when rewards aren't weighted
3232
3333 def __init__ (
3434 self ,
@@ -46,16 +46,16 @@ def __init__(
4646 recency_weight
4747 0.0 <= recency_weight <= 1.0
4848 The exponential recency weight used in calculating the average reward.
49- If this argument is not provided, the player will not weigh rewards based on recency.
49+ If this argument is equal to -1 or is not provided, the player will not weigh rewards based on recency.
5050 """
5151 super ().__init__ ()
5252 self ._rewards = {C : init_c_reward , D : init_d_reward }
5353 self .weight = recency_weight
5454
55- # treat out of range values as extremes
56- if self .weight <= 0 :
55+ # limit parameter value range
56+ if ( self . weight != self . UNIFORM ) and self .weight <= 0 :
5757 self .weight = 0.0
58- if ( not np . isinf ( self .weight )) and ( self . weight >= 1 ) :
58+ if self .weight >= 1 :
5959 self .weight = 1.0
6060
6161 def update_rewards (self , opponent : Player ):
@@ -66,7 +66,7 @@ def update_rewards(self, opponent: Player):
6666 last_score = game .score (last_round )[0 ]
6767
6868 # if UNIFORM, use 1 / total number of times the updated action was taken previously
69- if np . isinf ( self .weight ) :
69+ if self .weight == self . UNIFORM :
7070 weight = 1 / (
7171 self .history .cooperations if last_play == C else self .history .defections
7272 )
@@ -78,7 +78,6 @@ def update_rewards(self, opponent: Player):
7878 )
7979
8080 def strategy (self , opponent : Player ) -> Action :
81- """Actual strategy definition that determines player's action."""
8281 # if not the first turn
8382 if len (self .history ) != 0 :
8483 self .update_rewards (opponent )
@@ -89,7 +88,7 @@ def strategy(self, opponent: Player) -> Action:
8988
9089class EpsilonGreedy (Greedy ):
9190 """
92- Has a 1 - epsilon probability of behaving like Greedy(), and plays randomly otherwise .
91+ Has a 1 - epsilon probability of behaving like Greedy; otherwise, randomly choose to cooperate or defect .
9392
9493 Names:
9594
@@ -144,7 +143,6 @@ def _post_init(self):
144143 self .classifier ["stochastic" ] = False
145144
146145 def strategy (self , opponent : Player ) -> Action :
147- """Actual strategy definition that determines player's action."""
148146 # this will also update the reward appropriately
149147 greedy_action = super ().strategy (opponent )
150148
0 commit comments