style changes in docstrings and comments, renamed file to bandits, updated doc test, and changed value of UNIFORM constant to -1.

bing-j · marcharper · commit 8b17e01fbbec · 2024-07-12T14:39:32.000-07:00
diff --git a/axelrod/strategies/_strategies.py b/axelrod/strategies/_strategies.py
@@ -90,7 +90,7 @@
 from .dbs import DBS
 from .defector import Defector, TrickyDefector
 from .doubler import Doubler
-from .armed_bandits import Greedy, EpsilonGreedy
+from .bandits import Greedy, EpsilonGreedy
 from .finite_state_machines import (
     TF1,
     TF2,
diff --git a/axelrod/strategies/bandits.py b/axelrod/strategies/bandits.py
@@ -28,7 +28,7 @@ class Greedy(Player):
         "manipulates_state": False,
     }
 
-    UNIFORM = np.inf  # constant that replaces weight when rewards aren't weighted
+    UNIFORM = -1.0  # constant that replaces weight when rewards aren't weighted
 
     def __init__(
         self,
@@ -46,16 +46,16 @@ def __init__(
         recency_weight
             0.0 <= recency_weight <= 1.0
             The exponential recency weight used in calculating the average reward.
-            If this argument is not provided, the player will not weigh rewards based on recency.
+            If this argument is equal to -1 or is not provided, the player will not weigh rewards based on recency.
         """
         super().__init__()
         self._rewards = {C: init_c_reward, D: init_d_reward}
         self.weight = recency_weight
 
-        # treat out of range values as extremes
-        if self.weight <= 0:
+        # limit parameter value range
+        if (self.weight != self.UNIFORM) and self.weight <= 0:
             self.weight = 0.0
-        if (not np.isinf(self.weight)) and (self.weight >= 1):
+        if self.weight >= 1:
             self.weight = 1.0
 
     def update_rewards(self, opponent: Player):
@@ -66,7 +66,7 @@ def update_rewards(self, opponent: Player):
         last_score = game.score(last_round)[0]
 
         # if UNIFORM, use 1 / total number of times the updated action was taken previously
-        if np.isinf(self.weight):
+        if self.weight == self.UNIFORM:
             weight = 1 / (
                 self.history.cooperations if last_play == C else self.history.defections
             )
@@ -78,7 +78,6 @@ def update_rewards(self, opponent: Player):
         )
 
     def strategy(self, opponent: Player) -> Action:
-        """Actual strategy definition that determines player's action."""
         # if not the first turn
         if len(self.history) != 0:
             self.update_rewards(opponent)
@@ -89,7 +88,7 @@ def strategy(self, opponent: Player) -> Action:
 
 class EpsilonGreedy(Greedy):
     """
-    Has a 1 - epsilon probability of behaving like Greedy(), and plays randomly otherwise.
+    Has a 1 - epsilon probability of behaving like Greedy; otherwise, randomly choose to cooperate or defect.
 
     Names:
 
@@ -144,7 +143,6 @@ def _post_init(self):
             self.classifier["stochastic"] = False
 
     def strategy(self, opponent: Player) -> Action:
-        """Actual strategy definition that determines player's action."""
         # this will also update the reward appropriately
         greedy_action = super().strategy(opponent)
 
diff --git a/axelrod/tests/strategies/test_armed_bandits.py b/axelrod/tests/strategies/test_armed_bandits.py
@@ -89,35 +89,3 @@ def test_strategy(self):
             attrs={"_rewards": {C: 3, D: 0}},
             seed=1,
         )
-
-    # temporary overriding function used to search for seeds
-    # def versus_test(
-    #     self,
-    #     opponent,
-    #     expected_actions,
-    #     turns=None,
-    #     noise=None,
-    #     seed=None,
-    #     match_attributes=None,
-    #     attrs=None,
-    #     init_kwargs=None,
-    # ):
-    #
-    #     if init_kwargs is None:
-    #         init_kwargs = dict()
-    #
-    #     player = self.player(**init_kwargs)
-    #
-    #     test_match = TestMatch()
-    #     seed = test_match.search_seeds(
-    #         player,
-    #         opponent,
-    #         [x for (x, y) in expected_actions],
-    #         [y for (x, y) in expected_actions],
-    #         turns=turns,
-    #         noise=noise,
-    #         seed=seed,
-    #         attrs=attrs,
-    #         match_attributes=match_attributes,
-    #     )
-    #     self.assertIsNotNone(seed)
diff --git a/docs/index.rst b/docs/index.rst
@@ -53,7 +53,7 @@ Count the number of available players::
 
     >>> import axelrod as axl
     >>> len(axl.strategies)
-    240
+    242
 
 Create matches between two players::
 
diff --git a/docs/reference/strategy_index.rst b/docs/reference/strategy_index.rst
@@ -18,7 +18,7 @@ Here are the docstrings of all the strategies in the library.
    :members:
 .. automodule:: axelrod.strategies.appeaser
    :members:
-.. automodule:: axelrod.strategies.armed_bandits
+.. automodule:: axelrod.strategies.bandits
    :members:
 .. automodule:: axelrod.strategies.averagecopier
    :members: