Improved sensors:

gaorkl · gaorkl · commit d9472cc70267 · 2020-07-28T11:03:06.000+01:00
- cropped image for visual sensor to improv speed
- corrected calculation of depth and touch
- game engine now has to be reset outside of step function, to improve compatibility with other libraries
diff --git a/README.md b/README.md
@@ -12,6 +12,9 @@ and runs experiments very quickly.
 
 # Installation
 
+Before installing, you might have to install libsdl1.2-dev and pygame manually.
+
+Once these dependencies are installed, you can install simple-playgrounds using pip.
 A pip package is available and regularly updated:
 
 `pip3 install simple-playgrounds`
diff --git a/setup.py b/setup.py
@@ -6,7 +6,7 @@
 
 setup(
     name='simple_playgrounds',
-    version='0.9.16',
+    version='0.9.17',
     description='Simulator for AGI and RL',
     author='Michael Garcia Ortiz',
     author_email='michael.garcia-ortiz@city.ac.uk',
diff --git a/simple_playgrounds/entities/agents/sensors/visual_sensors/collection/depth_sensor.py b/simple_playgrounds/entities/agents/sensors/visual_sensors/collection/depth_sensor.py
@@ -29,9 +29,9 @@ def update_sensor(self, img):
 
         mask = self.polar_view != 0
         sensor = np.min(np.where(mask.any(axis=1), mask.argmax(axis=1),
-                                 self.polar_view.shape[1] - 1), axis=1)
+                                 self.polar_view.shape[1] ), axis=1)
 
-        sensor_value = (self.polar_view.shape[1] - sensor)
+        sensor_value = self._range*(self.polar_view.shape[1] - sensor)/self.polar_view.shape[1]
 
         image = np.asarray(sensor_value)
         image = np.expand_dims(image, 0)
diff --git a/simple_playgrounds/entities/agents/sensors/visual_sensors/collection/touch_sensor.py b/simple_playgrounds/entities/agents/sensors/visual_sensors/collection/touch_sensor.py
@@ -35,7 +35,7 @@ def __init__(self, anchor, invisible_elements=None, normalize=True, **kwargs):
         """
 
         super(TouchSensor, self).__init__(anchor, invisible_elements, normalize=normalize,
-                                          min_range=anchor.radius+1, **kwargs)
+                                          min_range=anchor.radius, **kwargs)
 
         self._range = self._min_range + self._range
 
@@ -53,8 +53,7 @@ def update_sensor(self, img):
             sensor = np.min(np.where(mask.any(axis=1), mask.argmax(axis=1),
                                      self.polar_view.shape[1] ), axis=1)
 
-            sensor_value = (self.polar_view.shape[1] - sensor)
-
+            sensor_value = (self._range - self._min_range) * (self.polar_view.shape[1] - sensor )/self.polar_view.shape[1]
             image = np.asarray(sensor_value)
             image = np.expand_dims(image, 0)
 
@@ -69,7 +68,7 @@ def update_sensor(self, img):
     def apply_normalization(self):
         if self.normalize:
 
-            self.sensor_value = self.sensor_value /(self._range - self._min_range)/(2*self._scale_ratio)
+            self.sensor_value = self.sensor_value /(self._range - self._min_range)
 
     @property
     def shape(self):
diff --git a/simple_playgrounds/entities/agents/sensors/visual_sensors/visual_sensor.py b/simple_playgrounds/entities/agents/sensors/visual_sensors/visual_sensor.py
@@ -98,11 +98,58 @@ def _crop_image(self, img):
         # # Position of the sensor
         sensor_x, sensor_y = self.anchor.pm_body.position
 
-        x_1 = int(max(0, (width - sensor_x) - self._range))
-        x_2 = int(min(width, (width - sensor_x) + self._range))
+        sensor_angle = (self.anchor.pm_body.angle + math.pi/2)%(2*math.pi)
 
-        y_1 = int(max(0, (height - sensor_y) - self._range))
-        y_2 = int(min(height, (height - sensor_y) + self._range))
+        theta_left = (sensor_angle + self._fov / 2.0)%(2*math.pi)
+        theta_right = (sensor_angle - self._fov / 2.0)%(2*math.pi)
+
+        pos_left = ( self._range * math.cos(theta_left), self._range * math.sin(theta_left) )
+        pos_right = ( self._range * math.cos(theta_right), self._range * math.sin(theta_right) )
+
+        pts_extrema = [pos_left, pos_right, (0,0)]
+
+        # angle 0
+        angles = [0, math.pi/2, 2*math.pi/2, 3*math.pi/2]
+
+        for angle in angles:
+
+            pt = (self._range * math.cos(angle), self._range * math.sin(angle))
+
+            if self._fov == 2*math.pi:
+                pts_extrema.append(pt)
+
+            elif angle == 0:
+
+                if theta_right > theta_left:
+                    pts_extrema.append(pt)
+
+            else:
+
+                if theta_left >= angle:
+
+                    if theta_right <= angle or theta_left <= theta_right:
+                        pts_extrema.append(pt)
+
+                if theta_left < angle:
+
+                    if theta_left <= theta_right <= angle:
+                        pts_extrema.append(pt)
+
+        # if theta_left <= theta_right <= 2*math.pi:
+        #     theta_right = theta_right - 2*math.pi
+
+        x_min = min([x for x,y in pts_extrema])
+        x_max = max([x for x,y in pts_extrema])
+        y_min = min([y for x,y in pts_extrema])
+        y_max = max([y for x,y in pts_extrema])
+
+        # print(x_min, x_max, y_min, y_max)
+
+        y_1 = int(max(0,  (height - sensor_y) + x_min))
+        y_2 = int(min(height, (height - sensor_y) + x_max))
+
+        x_2 = width - int(max(0, sensor_x + y_min))
+        x_1 = width - int(min(width, sensor_x + y_max))
 
         self._center = (((height - sensor_y) - y_1), ((width - sensor_x) - x_1))
 
diff --git a/simple_playgrounds/game_engine.py b/simple_playgrounds/game_engine.py
@@ -70,8 +70,7 @@ def __init__(self, playground, time_limit=None, replay=False, screen=False):
         self.surface_sensors = pygame.Surface((self.playground.width, self.playground.length))
 
         self.game_on = True
-        self.episode_elapsed_time = 0
-        self.total_elapsed_time = 0
+        self.elapsed_time = 0
 
     def multiple_steps(self, actions, n_steps=1):
         """
@@ -109,16 +108,32 @@ def multiple_steps(self, actions, n_steps=1):
         for agent_name in actions:
             cumulated_rewards[agent_name] = 0
 
-        for _ in range(n_steps-1):
-            self.step(hold_actions)
+        step = 0
+        continue_actions = True
+
+        while step < n_steps and continue_actions:
+
+            if step < n_steps-1:
+                action = hold_actions
+            else:
+                action = last_action
+
+            self._engine_step(action)
 
             for agent in self.agents:
                 cumulated_rewards[agent.name] += agent.reward
 
-        self.step(last_action)
+            step += 1
+
+            reset, terminate = self._handle_terminations()
+
+            if reset or terminate:
+                continue_actions = False
 
         for agent in self.agents:
-            agent.reward += cumulated_rewards[agent.name]
+            agent.reward = cumulated_rewards[agent.name]
+
+        return reset, terminate
 
     def step(self, actions):
         """
@@ -129,37 +144,67 @@ def step(self, actions):
 
         """
 
-        for agent in self.agents:
-            agent.apply_actions_to_body_parts(actions[agent.name])
-
-        self.playground.update(SIMULATION_STEPS)
+        self._engine_step(actions)
 
         # Termination
-        game_reset, game_terminates = self.game_terminated()
+        reset, terminate = self._handle_terminations()
+
+        return reset, terminate
+
+    def _handle_terminations(self):
+
+        reset = False
+        terminate = False
+
+        playground_terminated = self.playground.done
+        reached_time_limit = self._check_time()
+        keyboard_reset, keyboard_quit = self._check_keyboard()
+
+        if keyboard_quit:
+            terminate = True
+
+        elif keyboard_reset:
+            reset = True
+
+        elif playground_terminated:
+
+            if self.replay_until_time_limit:
+                reset = True
+
+            else:
+                terminate = True
 
-        if game_reset:
-            self.reset()
+        elif reached_time_limit:
 
-        if game_terminates:
-            self.game_on = False
-            self.terminate()
+            terminate = True
 
-        self.total_elapsed_time += 1
-        self.episode_elapsed_time += 1
+        return reset, terminate
+
+    def _engine_step(self, actions):
+
+        for agent in self.agents:
+            agent.apply_actions_to_body_parts(actions[agent.name])
+
+        self.playground.update(SIMULATION_STEPS)
+
+        self.elapsed_time += 1
 
     def reset(self):
         """
         Resets the game to its initial state.
 
         """
-        self.episode_elapsed_time = 0
-
         self.playground.reset()
-
         self.game_on = True
 
+    def _check_time(self):
+        if self.elapsed_time >= self.time_limit:
+            return True
+        else:
+            return False
+
 
-    def game_terminated(self):
+    def _check_keyboard(self):
         """
         Tests whether the game came to an end, because of time limit or termination of playground.
 
@@ -170,14 +215,6 @@ def game_terminated(self):
         reset_game = False
         terminate_game = False
 
-        if self.total_elapsed_time == self.time_limit or self.playground.done:
-
-            if self.replay_until_time_limit and self.total_elapsed_time < self.time_limit:
-                reset_game = True
-            else:
-                terminate_game = True
-
-
         if self.screen is not None:
 
             pygame.event.get()
@@ -198,10 +235,7 @@ def game_terminated(self):
             elif pygame.key.get_pressed()[K_r] and self.reset_key_ready is True:
                 self.reset_key_ready = False
 
-                if self.replay_until_time_limit:
-                    reset_game = True
-                else:
-                    terminate_game = True
+                reset_game = True
 
         return reset_game, terminate_game
 
@@ -349,7 +383,7 @@ def run(self, steps=None, with_screen = False, print_rewards = False):
             for agent in self.agents:
                 actions[agent.name] = agent.controller.generate_actions()
 
-            self.step(actions)
+            reset, terminate = self.step(actions)
             self.update_observations()
 
             if with_screen and self.game_on:
@@ -366,12 +400,15 @@ def run(self, steps=None, with_screen = False, print_rewards = False):
                 if steps ==0:
                     continue_for_n_steps = False
 
+            if reset:
+                self.reset()
+
+            if terminate:
+                continue_for_n_steps = False
+                self.terminate()
 
-            # for agent in self.agents:
-            #     print(agent.position, agent.base_platform.pm_body.velocity, agent.base_platform.pm_body.kinetic_energy)
-            #     assert 0 < agent.position[0] < self.playground.size[0]
-            #     assert 0 < agent.position[1] < self.playground.size[1]
 
     def terminate(self):
 
+        self.game_on = False
         pygame.quit()
diff --git a/simple_playgrounds/playgrounds/collection/rl/basic.py b/simple_playgrounds/playgrounds/collection/rl/basic.py
@@ -32,7 +32,7 @@ class Endgoal9Rooms(ConnectedRooms2D):
 
     def __init__(self):
 
-        super().__init__(size = (600, 600), n_rooms=(3,3),wall_type='colorful')
+        super().__init__(size = (600, 600), n_rooms=(3,3), wall_type='colorful')
 
         # Starting area of the agent
         area_start = PositionAreaSampler(center=(300, 300), area_shape='rectangle', width_length=(600, 600))
@@ -52,7 +52,7 @@ def __init__(self):
         super().__init__(size = (200, 100), n_rooms=2, wall_type='colorful')
 
         # Starting area of the agent
-        area_start = PositionAreaSampler(center=(100, 50), area_shape='rectangle', width_length=(200, 100))
+        area_start = PositionAreaSampler(center=(50, 50), area_shape='rectangle', width_length=(100, 100))
         self.agent_starting_area = area_start
 
         # invisible endzone at one corner of the game
diff --git a/simple_playgrounds/playgrounds/configs/playground_default.yml b/simple_playgrounds/playgrounds/configs/playground_default.yml
@@ -22,8 +22,8 @@ colorful :
   n_colors: 20
   delta_uniform: 10
   size_tiles : 10
-  color_min : [ 0, 0, 0 ]
-  color_max: [150, 150, 150 ]
+  color_min : [ 100, 100, 100 ]
+  color_max: [250, 250, 250 ]
 
 wall:
   entity_type: basic
diff --git a/simple_playgrounds/playgrounds/playground.py b/simple_playgrounds/playgrounds/playground.py
@@ -145,9 +145,11 @@ def reset(self):
             entity.reset()
 
         # reset agents
-        #self._remove_agents()
-        for agent in self.agents:
+        # self._remove_agents()
+        for agent in self.agents.copy():
             agent.reset()
+            self.remove_agent(agent)
+            self.add_agent(agent)
             #self.add_agent(agent)
 
         self.done = False
@@ -368,7 +370,7 @@ def remove_scene_element(self, scene_element):
             self._disappeared_scene_elements.append(scene_element)
 
         for elem in self.scene_elements:
-            if elem.entity_type is SceneElementTypes.DISPENSER and scene_element in elem.produced_entities:
+            if elem.entity_type == 'dispenser' and scene_element in elem.produced_entities:
                 elem.produced_entities.remove(scene_element)
 
         for field in self.fields:
diff --git a/tests/test_agents.py b/tests/test_agents.py
@@ -30,7 +30,7 @@ def test_base_agent_on_all_test_playgrounds():
 
         print('Starting testing of ', pg_class.__name__)
 
-        engine = Engine(pg, time_limit=1000, replay=False)
+        engine = Engine(pg, time_limit=1000, replay=True)
         engine.run()
 
         assert 0 < agent.position[0] < pg.size[0]
diff --git a/tests/test_playgrounds.py b/tests/test_playgrounds.py
diff --git a/tutorials/jupyter/03_SceneElements.ipynb b/tutorials/jupyter/03_SceneElements.ipynb
diff --git a/tutorials/jupyter/06_Agents.ipynb b/tutorials/jupyter/06_Agents.ipynb
diff --git a/wrappers/__init__.py b/wrappers/__init__.py
diff --git a/wrappers/stable_wrappers.py b/wrappers/stable_wrappers.py