|
16 | 16 | "max_decel": 1,
|
17 | 17 | # whether we use an obs space that contains adjacent lane info or just the lead obs
|
18 | 18 | "lead_obs": True,
|
| 19 | + # whether the reward should come from local vehicles instead of global rewards |
| 20 | + "local_reward": True |
19 | 21 | }
|
20 | 22 |
|
21 | 23 |
|
@@ -137,35 +139,47 @@ def compute_reward(self, rl_actions, **kwargs):
|
137 | 139 | return {}
|
138 | 140 |
|
139 | 141 | rewards = {}
|
140 |
| - for rl_id in self.k.vehicle.get_rl_ids(): |
141 |
| - if self.env_params.evaluate: |
142 |
| - # reward is speed of vehicle if we are in evaluation mode |
143 |
| - reward = self.k.vehicle.get_speed(rl_id) |
144 |
| - elif kwargs['fail']: |
145 |
| - # reward is 0 if a collision occurred |
146 |
| - reward = 0 |
147 |
| - else: |
148 |
| - # reward high system-level velocities |
149 |
| - cost1 = average_velocity(self, fail=kwargs['fail']) |
150 |
| - |
151 |
| - # penalize small time headways |
152 |
| - cost2 = 0 |
153 |
| - t_min = 1 # smallest acceptable time headway |
154 |
| - |
155 |
| - lead_id = self.k.vehicle.get_leader(rl_id) |
156 |
| - if lead_id not in ["", None] \ |
157 |
| - and self.k.vehicle.get_speed(rl_id) > 0: |
158 |
| - t_headway = max( |
159 |
| - self.k.vehicle.get_headway(rl_id) / |
160 |
| - self.k.vehicle.get_speed(rl_id), 0) |
161 |
| - cost2 += min((t_headway - t_min) / t_min, 0) |
162 |
| - |
163 |
| - # weights for cost1, cost2, and cost3, respectively |
164 |
| - eta1, eta2 = 1.00, 0.10 |
165 |
| - |
166 |
| - reward = max(eta1 * cost1 + eta2 * cost2, 0) |
167 |
| - |
168 |
| - rewards[rl_id] = reward |
| 142 | + if self.env_params.additional_params["local_reward"]: |
| 143 | + for rl_id in self.k.vehicle.get_rl_ids(): |
| 144 | + rewards[rl_id] = 0 |
| 145 | + speeds = [] |
| 146 | + follow_speed = self.k.vehicle.get_speed(self.k.vehicle.get_follower(rl_id)) |
| 147 | + speeds.extend([speed for speed in follow_speed if speed >= 0]) |
| 148 | + if self.k.vehicle.get_speed(rl_id) >= 0: |
| 149 | + speeds.append(self.k.vehicle.get_speed(rl_id)) |
| 150 | + if len(speeds) > 0: |
| 151 | + # rescale so the q function can estimate it quickly |
| 152 | + rewards[rl_id] = np.mean(speeds) / 500.0 |
| 153 | + else: |
| 154 | + for rl_id in self.k.vehicle.get_rl_ids(): |
| 155 | + if self.env_params.evaluate: |
| 156 | + # reward is speed of vehicle if we are in evaluation mode |
| 157 | + reward = self.k.vehicle.get_speed(rl_id) |
| 158 | + elif kwargs['fail']: |
| 159 | + # reward is 0 if a collision occurred |
| 160 | + reward = 0 |
| 161 | + else: |
| 162 | + # reward high system-level velocities |
| 163 | + cost1 = average_velocity(self, fail=kwargs['fail']) |
| 164 | + |
| 165 | + # penalize small time headways |
| 166 | + cost2 = 0 |
| 167 | + t_min = 1 # smallest acceptable time headway |
| 168 | + |
| 169 | + lead_id = self.k.vehicle.get_leader(rl_id) |
| 170 | + if lead_id not in ["", None] \ |
| 171 | + and self.k.vehicle.get_speed(rl_id) > 0: |
| 172 | + t_headway = max( |
| 173 | + self.k.vehicle.get_headway(rl_id) / |
| 174 | + self.k.vehicle.get_speed(rl_id), 0) |
| 175 | + cost2 += min((t_headway - t_min) / t_min, 0) |
| 176 | + |
| 177 | + # weights for cost1, cost2, and cost3, respectively |
| 178 | + eta1, eta2 = 1.00, 0.10 |
| 179 | + |
| 180 | + reward = max(eta1 * cost1 + eta2 * cost2, 0) |
| 181 | + |
| 182 | + rewards[rl_id] = reward |
169 | 183 | return rewards
|
170 | 184 |
|
171 | 185 | def additional_command(self):
|
|
0 commit comments