17
17
18
18
19
19
class DroneRaceEnv (RaceCoreEnv , Env ):
20
+ """Single-agent drone racing environment."""
21
+
20
22
def __init__ (
21
23
self ,
22
24
freq : int ,
@@ -31,6 +33,21 @@ def __init__(
31
33
max_episode_steps : int = 1500 ,
32
34
device : Literal ["cpu" , "gpu" ] = "cpu" ,
33
35
):
36
+ """Initialize the single-agent drone racing environment.
37
+
38
+ Args:
39
+ freq: Environment step frequency.
40
+ sim_config: Simulation configuration.
41
+ sensor_range: Sensor range.
42
+ action_space: Control mode for the drones. See `build_action_space` for details.
43
+ track: Track configuration.
44
+ disturbances: Disturbance configuration.
45
+ randomizations: Randomization configuration.
46
+ random_resets: Flag to reset the environment randomly.
47
+ seed: Random seed.
48
+ max_episode_steps: Maximum number of steps per episode.
49
+ device: Device used for the environment and the simulation.
50
+ """
34
51
super ().__init__ (
35
52
n_envs = 1 ,
36
53
n_drones = 1 ,
@@ -52,19 +69,38 @@ def __init__(
52
69
self .autoreset = False
53
70
54
71
def reset (self , seed : int | None = None , options : dict | None = None ) -> tuple [dict , dict ]:
72
+ """Reset the environment.
73
+
74
+ Args:
75
+ seed: Random seed.
76
+ options: Additional reset options. Not used.
77
+
78
+ Returns:
79
+ The initial observation and info.
80
+ """
55
81
obs , info = super ().reset (seed = seed , options = options )
56
82
obs = {k : v [0 , 0 ] for k , v in obs .items ()}
57
83
info = {k : v [0 , 0 ] for k , v in info .items ()}
58
84
return obs , info
59
85
60
86
def step (self , action : NDArray [np .floating ]) -> tuple [dict , float , bool , bool , dict ]:
87
+ """Step the environment.
88
+
89
+ Args:
90
+ action: Action for the drone.
91
+
92
+ Returns:
93
+ Observation, reward, terminated, truncated, and info.
94
+ """
61
95
obs , reward , terminated , truncated , info = super ().step (action )
62
96
obs = {k : v [0 , 0 ] for k , v in obs .items ()}
63
97
info = {k : v [0 , 0 ] for k , v in info .items ()}
64
- return obs , reward [0 , 0 ], terminated [0 , 0 ], truncated [0 , 0 ], info
98
+ return obs , float ( reward [0 , 0 ]), bool ( terminated [0 , 0 ]), bool ( truncated [0 , 0 ]) , info
65
99
66
100
67
101
class VecDroneRaceEnv (RaceCoreEnv , VectorEnv ):
102
+ """Vectorized single-agent drone racing environment."""
103
+
68
104
def __init__ (
69
105
self ,
70
106
num_envs : int ,
@@ -80,6 +116,22 @@ def __init__(
80
116
max_episode_steps : int = 1500 ,
81
117
device : Literal ["cpu" , "gpu" ] = "cpu" ,
82
118
):
119
+ """Initialize the vectorized single-agent drone racing environment.
120
+
121
+ Args:
122
+ num_envs: Number of worlds in the vectorized environment.
123
+ freq: Environment step frequency.
124
+ sim_config: Simulation configuration.
125
+ sensor_range: Sensor range.
126
+ action_space: Control mode for the drones. See `build_action_space` for details.
127
+ track: Track configuration.
128
+ disturbances: Disturbance configuration.
129
+ randomizations: Randomization configuration.
130
+ random_resets: Flag to reset the environment randomly.
131
+ seed: Random seed.
132
+ max_episode_steps: Maximum number of steps per episode.
133
+ device: Device used for the environment and the simulation.
134
+ """
83
135
super ().__init__ (
84
136
n_envs = num_envs ,
85
137
n_drones = 1 ,
@@ -102,12 +154,31 @@ def __init__(
102
154
self .observation_space = batch_space (self .single_observation_space , num_envs )
103
155
104
156
def reset (self , seed : int | None = None , options : dict | None = None ) -> tuple [dict , dict ]:
157
+ """Reset the environment in all worlds.
158
+
159
+ Args:
160
+ seed: Random seed.
161
+ options: Additional reset options. Not used.
162
+
163
+ Returns:
164
+ The initial observation and info.
165
+ """
105
166
obs , info = super ().reset (seed = seed , options = options )
106
167
obs = {k : v [:, 0 ] for k , v in obs .items ()}
107
168
info = {k : v [:, 0 ] for k , v in info .items ()}
108
169
return obs , info
109
170
110
- def step (self , action : NDArray [np .floating ]) -> tuple [dict , float , bool , bool , dict ]:
171
+ def step (
172
+ self , action : NDArray [np .floating ]
173
+ ) -> tuple [dict , NDArray [np .floating ], NDArray [np .bool_ ], NDArray [np .bool_ ], dict ]:
174
+ """Step the environment in all worlds.
175
+
176
+ Args:
177
+ action: Action for all worlds, i.e., a batch of (n_envs, action_dim) arrays.
178
+
179
+ Returns:
180
+ Observation, reward, terminated, truncated, and info.
181
+ """
111
182
obs , reward , terminated , truncated , info = super ().step (action )
112
183
obs = {k : v [:, 0 ] for k , v in obs .items ()}
113
184
info = {k : v [:, 0 ] for k , v in info .items ()}
0 commit comments