Skip to content

Commit 942fe56

Browse files
Add an option for a local reward that just computes speed of the AV and its follower
1 parent 91144ca commit 942fe56

File tree

4 files changed

+46
-33
lines changed

4 files changed

+46
-33
lines changed

examples/exp_configs/non_rl/i210_subnetwork.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@
101101
edge_id = "119257908#1-AddedOnRampEdge"
102102
custom_callables = {
103103
"avg_merge_speed": lambda env: np.nan_to_num(np.mean(
104-
env.k.vehicle.get_speed(env.k.vehicle.get_ids_by_edge(edge_id)))),
104+
env.k.vehicle.get_speed(env.k.vehicle.get_ids()))),
105105
"avg_outflow": lambda env: np.nan_to_num(
106106
env.k.vehicle.get_outflow_rate(120)),
107107
# we multiply by 5 to account for the vehicle length and by 1000 to convert

examples/exp_configs/rl/multiagent/multiagent_i210.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
"""
66
import os
77

8-
from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy
98
from ray.tune.registry import register_env
109

1110
from flow.controllers import RLController

examples/train.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222

2323
import ray
2424
from ray import tune
25-
from ray.tune import run_experiments
2625
from ray.tune.registry import register_env
2726
try:
2827
from ray.rllib.agents.agent import get_agent_class
@@ -36,9 +35,9 @@
3635
from flow.utils.registry import make_create_env
3736

3837

39-
4038
def parse_args(args):
4139
"""Parse training options user can specify in command line.
40+
4241
Returns
4342
-------
4443
argparse.Namespace
@@ -140,6 +139,7 @@ def setup_exps_rllib(flow_params,
140139
policies_to_train=None,
141140
):
142141
"""Return the relevant components of an RLlib experiment.
142+
143143
Parameters
144144
----------
145145
flow_params : dict

flow/envs/multiagent/i210.py

Lines changed: 43 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
"max_decel": 1,
1717
# whether we use an obs space that contains adjacent lane info or just the lead obs
1818
"lead_obs": True,
19+
# whether the reward should come from local vehicles instead of global rewards
20+
"local_reward": True
1921
}
2022

2123

@@ -137,35 +139,47 @@ def compute_reward(self, rl_actions, **kwargs):
137139
return {}
138140

139141
rewards = {}
140-
for rl_id in self.k.vehicle.get_rl_ids():
141-
if self.env_params.evaluate:
142-
# reward is speed of vehicle if we are in evaluation mode
143-
reward = self.k.vehicle.get_speed(rl_id)
144-
elif kwargs['fail']:
145-
# reward is 0 if a collision occurred
146-
reward = 0
147-
else:
148-
# reward high system-level velocities
149-
cost1 = average_velocity(self, fail=kwargs['fail'])
150-
151-
# penalize small time headways
152-
cost2 = 0
153-
t_min = 1 # smallest acceptable time headway
154-
155-
lead_id = self.k.vehicle.get_leader(rl_id)
156-
if lead_id not in ["", None] \
157-
and self.k.vehicle.get_speed(rl_id) > 0:
158-
t_headway = max(
159-
self.k.vehicle.get_headway(rl_id) /
160-
self.k.vehicle.get_speed(rl_id), 0)
161-
cost2 += min((t_headway - t_min) / t_min, 0)
162-
163-
# weights for cost1, cost2, and cost3, respectively
164-
eta1, eta2 = 1.00, 0.10
165-
166-
reward = max(eta1 * cost1 + eta2 * cost2, 0)
167-
168-
rewards[rl_id] = reward
142+
if self.env_params.additional_params["local_reward"]:
143+
for rl_id in self.k.vehicle.get_rl_ids():
144+
rewards[rl_id] = 0
145+
speeds = []
146+
follow_speed = self.k.vehicle.get_speed(self.k.vehicle.get_follower(rl_id))
147+
speeds.extend([speed for speed in follow_speed if speed >= 0])
148+
if self.k.vehicle.get_speed(rl_id) >= 0:
149+
speeds.append(self.k.vehicle.get_speed(rl_id))
150+
if len(speeds) > 0:
151+
# rescale so the q function can estimate it quickly
152+
rewards[rl_id] = np.mean(speeds) / 500.0
153+
else:
154+
for rl_id in self.k.vehicle.get_rl_ids():
155+
if self.env_params.evaluate:
156+
# reward is speed of vehicle if we are in evaluation mode
157+
reward = self.k.vehicle.get_speed(rl_id)
158+
elif kwargs['fail']:
159+
# reward is 0 if a collision occurred
160+
reward = 0
161+
else:
162+
# reward high system-level velocities
163+
cost1 = average_velocity(self, fail=kwargs['fail'])
164+
165+
# penalize small time headways
166+
cost2 = 0
167+
t_min = 1 # smallest acceptable time headway
168+
169+
lead_id = self.k.vehicle.get_leader(rl_id)
170+
if lead_id not in ["", None] \
171+
and self.k.vehicle.get_speed(rl_id) > 0:
172+
t_headway = max(
173+
self.k.vehicle.get_headway(rl_id) /
174+
self.k.vehicle.get_speed(rl_id), 0)
175+
cost2 += min((t_headway - t_min) / t_min, 0)
176+
177+
# weights for cost1, cost2, and cost3, respectively
178+
eta1, eta2 = 1.00, 0.10
179+
180+
reward = max(eta1 * cost1 + eta2 * cost2, 0)
181+
182+
rewards[rl_id] = reward
169183
return rewards
170184

171185
def additional_command(self):

0 commit comments

Comments
 (0)