@@ -144,6 +144,7 @@ def __init__(
144
144
observation_height = 96 ,
145
145
visualization_width = 680 ,
146
146
visualization_height = 680 ,
147
+ randomize_goal = False ,
147
148
):
148
149
super ().__init__ ()
149
150
# Observations
@@ -167,6 +168,13 @@ def __init__(
167
168
self .block_cog = block_cog
168
169
self .damping = damping
169
170
171
+ # Randomization
172
+ self .randomize_goal = randomize_goal
173
+ # Safe margins from walls for positioning objects
174
+ self .margin = 100 # Margin from walls to avoid spawning too close to edges
175
+ self .min_pos = np .array ([self .margin , self .margin ])
176
+ self .max_pos = np .array ([512 - self .margin , 512 - self .margin ])
177
+
170
178
# If human-rendering is used, `self.window` will be a reference
171
179
# to the window that we draw to. `self.clock` will be a clock that is used
172
180
# to ensure that the environment is rendered at the correct framerate in
@@ -269,18 +277,27 @@ def reset(self, seed=None, options=None):
269
277
super ().reset (seed = seed )
270
278
self ._setup ()
271
279
280
+ # Randomize goal if enabled
281
+ if self .randomize_goal :
282
+ # Randomize goal position and orientation
283
+ goal_x = self .np_random .uniform (self .min_pos [0 ], self .max_pos [0 ])
284
+ goal_y = self .np_random .uniform (self .min_pos [1 ], self .max_pos [1 ])
285
+ goal_theta = self .np_random .uniform (0 , 2 * np .pi )
286
+ self .goal_pose = np .array ([goal_x , goal_y , goal_theta ])
287
+
288
+ # Handle state reset
272
289
if options is not None and options .get ("reset_to_state" ) is not None :
273
290
state = np .array (options .get ("reset_to_state" ))
274
291
else :
275
292
# state = self.np_random.uniform(low=[50, 50, 100, 100, -np.pi], high=[450, 450, 400, 400, np.pi])
276
293
rs = np .random .RandomState (seed = seed )
277
294
state = np .array (
278
295
[
279
- rs . randint ( 50 , 450 ),
280
- rs . randint ( 50 , 450 ),
281
- rs . randint ( 100 , 400 ),
282
- rs . randint ( 100 , 400 ),
283
- rs . randn () * 2 * np .pi - np . pi ,
296
+ self . np_random . uniform ( self . min_pos [ 0 ], self . max_pos [ 0 ]), # agent_x
297
+ self . np_random . uniform ( self . min_pos [ 1 ], self . max_pos [ 1 ]), # agent_y
298
+ self . np_random . uniform ( self . min_pos [ 0 ], self . max_pos [ 0 ]), # block_x
299
+ self . np_random . uniform ( self . min_pos [ 1 ], self . max_pos [ 1 ]), # block_y
300
+ self . np_random . uniform ( 0 , 2 * np .pi ), # block_angle
284
301
],
285
302
# dtype=np.float64
286
303
)
@@ -446,6 +463,7 @@ def _setup(self):
446
463
# Add agent, block, and goal zone
447
464
self .agent = self .add_circle (self .space , (256 , 400 ), 15 )
448
465
self .block , self ._block_shapes = self .add_tee (self .space , (256 , 300 ), 0 )
466
+ # Default goal pose that will be used if randomization is disabled
449
467
self .goal_pose = np .array ([256 , 256 , np .pi / 4 ]) # x, y, theta (in radians)
450
468
if self .block_cog is not None :
451
469
self .block .center_of_gravity = self .block_cog
0 commit comments