Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

format and other minor changes #26

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
Draft
172,879 changes: 172,879 additions & 0 deletions MJMODEL.TXT

Large diffs are not rendered by default.

17 changes: 12 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,20 @@ For significant changes, please open an issue first to discuss what you would li

Currently, our todo list is:

- [X] Load and replay one `LeRobotDataset` in simulation.
- [X] Implement inverse kinematics in each environment, improvements remain very welcome.
- [x] Load and replay one `LeRobotDataset` in simulation
- [x] Implement inverse kinematics in each environment, improvements remain very welcome

Training related:
- [ ] Train policies with SB3
- [ ] Provide reward shaping functions, get inspired from meta-world

Real-world related:
- [ ] Implement controller interface for simulation, like lowcostrobot-leader
- [ ] Provide reward shaping functions, get inspired from meta-world .
- [ ] Implement the real-world interface, seemless interface with real-world observations, motor.
- [ ] Improve the fidelity of the simulation.
- [ ] Implement the real-world interface, seemless interface with real-world observations, motor

Improve the fidelity of the simulation:
- [ ] Parameter identification (kp, dampping, ...)
- [ ] Simplify the gripper mesh for more stable grasp and add collision shape

## License

Expand Down
3 changes: 2 additions & 1 deletion examples/gym_manipulation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import gymnasium as gym
import gym_lowcostrobot

import gym_lowcostrobot # noqa


def do_env_sim():
Expand Down
3 changes: 2 additions & 1 deletion examples/gym_manipulation_sb3.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import gymnasium as gym
import gym_lowcostrobot
import torch
from gymnasium.wrappers.filter_observation import FilterObservation
from gymnasium.wrappers.flatten_observation import FlattenObservation
from stable_baselines3 import PPO, TD3
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy

import gym_lowcostrobot # noqa


def do_td3_push():
env = gym.make("PushCube-v0", observation_mode="state", render_mode=None)
Expand Down
13 changes: 5 additions & 8 deletions examples/lerobotdataset_load.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
import argparse, time
import argparse

import mujoco
import mujoco.viewer

from lerobot.common.datasets.lerobot_dataset import LeRobotDataset



def main():

# You can easily load a dataset from a Hugging Face repository
dataset = LeRobotDataset(args.repo_id)
print(f"\n{dataset[0]['observation.images.cam_high'].shape=}") # (4,c,h,w)
Expand All @@ -23,22 +20,22 @@ def main():
print(f"Starting episode {current_episode}")
with mujoco.viewer.launch_passive(model, data) as viewer:
# Run the simulation

step = 0
while viewer.is_running():

if step in dataset.episode_data_index["from"]:
current_episode += 1
mujoco.mj_resetData(model, data)
mujoco.mj_resetData(model, data)
print(f"Starting episode {current_episode}")

# Step the simulation forward
data.ctrl = dataset[step]['observation.state']
data.ctrl = dataset[step]["observation.state"]
mujoco.mj_step(model, data)

viewer.sync()
step += 1


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Replay trajectories from leRobotDataset hub")
parser.add_argument("--repo_id", type=str, default="thomwolf/blue_sort", help="Path to HDF5 file")
Expand Down
151 changes: 151 additions & 0 deletions examples/mujoco_lift_scripted_policy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
import gymnasium as gym
import mujoco
import numpy as np

import gym_lowcostrobot # noqa


def displace_object(env, square_size=0.15, invert_y=False, origin_pos=[0, 0.1]):
### Sample a position in a square in front of the robot
if not invert_y:
x = np.random.uniform(origin_pos[0] - square_size / 2, origin_pos[0] + square_size / 2)
y = np.random.uniform(origin_pos[1] - square_size / 2, origin_pos[1] + square_size / 2)
else:
x = np.random.uniform(origin_pos[0] + square_size / 2, origin_pos[0] - square_size / 2)
y = np.random.uniform(origin_pos[1] + square_size / 2, origin_pos[1] - square_size / 2)
env.data.qpos[:3] = np.array([x, y, origin_pos[2]])
return env.data.qpos[:3]


class BasePolicy:
def __init__(self, inject_noise=False, init_pose=None, meet_pose=None):
self.inject_noise = inject_noise
self.step_count = 0
self.trajectory = None
self.right_trajectory = None
self.init_pose = init_pose
self.meet_pose = meet_pose

def generate_trajectory(self, ts_first):
raise NotImplementedError

@staticmethod
def interpolate(curr_waypoint, next_waypoint, t):
t_frac = (t - curr_waypoint["t"]) / (next_waypoint["t"] - curr_waypoint["t"])
curr_xyz = curr_waypoint["xyz"]
curr_quat = curr_waypoint["quat"]
curr_grip = curr_waypoint["gripper"]
next_xyz = next_waypoint["xyz"]
next_quat = next_waypoint["quat"]
next_grip = next_waypoint["gripper"]
xyz = curr_xyz + (next_xyz - curr_xyz) * t_frac
quat = curr_quat + (next_quat - curr_quat) * t_frac
gripper = curr_grip + (next_grip - curr_grip) * t_frac
return xyz, quat, gripper

def __call__(self):
# generate trajectory at first timestep, then open-loop execution
if self.step_count == 0:
self.generate_trajectory(self.init_pose, self.meet_pose)

# obtain arm waypoints
if self.trajectory[0]["t"] == self.step_count:
self.curr_waypoint = self.trajectory.pop(0)
next_waypoint = self.trajectory[0]

# interpolate between waypoints to obtain current pose and gripper command
xyz, quat, gripper = self.interpolate(self.curr_waypoint, next_waypoint, self.step_count)

# Inject noise
if self.inject_noise:
scale = 0.01
xyz = xyz + np.random.uniform(-scale, scale, xyz.shape)

action = np.concatenate([xyz, quat, [gripper]])

self.step_count += 1
return action


class LiftCubePolicy(BasePolicy):
def generate_trajectory(self, init_pose, meet_pose):
init_pose = init_pose
meet_pose = meet_pose

# box_info = np.array(ts_first.observation['env_state'])
# box_xyz = box_info[:3]
# box_quat = box_info[3:]
# print(f"Generate trajectory for {box_xyz=}")

# gripper_pick_quat = Quaternion(init_mocap_pose_right[3:])
# gripper_pick_quat = gripper_pick_quat * Quaternion(axis=[0.0, 1.0, 0.0], degrees=-60)

meet_xyz = meet_pose[:3]

self.trajectory = [
{"t": 0, "xyz": init_pose[:3], "quat": init_pose[3:], "gripper": 0}, # sleep
{
"t": 800,
"xyz": meet_xyz + np.array([0.0, 0.01, 0.075]),
"quat": meet_pose[3:],
"gripper": -1.5,
}, # approach meet position
{
"t": 1100,
"xyz": meet_xyz + np.array([0.0, 0.01, 0.02]),
"quat": meet_pose[3:],
"gripper": -1.5,
}, # move to meet position
{
"t": 1350,
"xyz": meet_xyz + np.array([0.0, 0.01, 0.02]),
"quat": meet_pose[3:],
"gripper": -0.25,
}, # close gripper
{"t": 1490, "xyz": meet_xyz + np.array([0.0, 0.01, 0.1]), "quat": meet_pose[3:], "gripper": -0.25}, # lift up
{"t": 1500, "xyz": meet_xyz + np.array([0.0, 0.01, 0.1]), "quat": meet_pose[3:], "gripper": -0.25}, # stay
]


def test_policy(task_name):
# setup the environment
if "LiftCube" in task_name:
env = gym.make("LiftCube-v0", render_mode="human", action_mode="ee")
# other tasks can be added here
else:
raise NotImplementedError

NUM_EPISODES = 5
cube_origin_pos = [0.03390873, 0.22571199, 0.04]

for episode_idx in range(NUM_EPISODES):
observation, info = env.reset()
cube_pos = displace_object(env, square_size=0.1, invert_y=False, origin_pos=cube_origin_pos)
# cube_pos = env.unwrapped.data.qpos[:3].astype(np.float32)
ee_id = env.model.body("moving_side").id
ee_pos = env.unwrapped.data.xpos[ee_id].astype(np.float32) # default [0.03390873 0.22571199 0.14506643]
ee_orn = np.zeros(4, dtype=np.float64)
mujoco.mju_mat2Quat(ee_orn, env.unwrapped.data.xmat[ee_id])
# keep orientation constant
init_pose = np.concatenate([ee_pos, ee_orn])
meet_pose = np.concatenate([cube_pos, ee_orn])
policy = LiftCubePolicy(init_pose=init_pose, meet_pose=meet_pose)
episode_length = 1500
for i in range(episode_length):
action = env.action_space.sample()
result = policy()
ee_pos = env.unwrapped.data.xpos[ee_id].astype(np.float32)
action[:3] = result[:3] - ee_pos
action[3] = result[7]
# Step the environment
observation, reward, terminted, truncated, info = env.step(action)

# Reset the environment if it's done
if terminted or truncated:
observation, info = env.reset()
break


if __name__ == "__main__":
test_task_name = "LiftCube"
test_policy(test_task_name)
44 changes: 44 additions & 0 deletions examples/test_ik.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import gymnasium as gym
import numpy as np

import gym_lowcostrobot # noqa


def displace_object(square_size=0.15, invert_y=False, origin_pos=[0, 0.1]):
### Sample a position in a square in front of the robot
if not invert_y:
x = np.random.uniform(origin_pos[0] - square_size / 2, origin_pos[0] + square_size / 2)
y = np.random.uniform(origin_pos[1] - square_size / 2, origin_pos[1] + square_size / 2)
else:
x = np.random.uniform(origin_pos[0] + square_size / 2, origin_pos[0] - square_size / 2)
y = np.random.uniform(origin_pos[1] + square_size / 2, origin_pos[1] - square_size / 2)
env.data.qpos[:3] = np.array([x, y, origin_pos[2]])
return env.data.qpos[:3]


# Create the environment
env = gym.make("ReachCube-v0", render_mode="human", action_mode="ee")

# Reset the environment
observation, info = env.reset()
cube_origin_pos = env.data.qpos[:3].astype(np.float32)
for i in range(10000):
if i % 500 == 0:
cube_pos = displace_object(square_size=0.2, invert_y=False, origin_pos=cube_origin_pos)
# Sample random action
action = env.action_space.sample()
ee_id = env.model.body("moving_side").id
ee_pos = env.data.xpos[ee_id].astype(np.float32) # default [0.03390873 0.22571199 0.14506643]
action[:3] = cube_pos + [0, 0, 0.1] - ee_pos
# action[:3] = [0.03390873, 0.22571199, 0.14506643]
action[3] = -1.5

# Step the environment
observation, reward, terminted, truncated, info = env.step(action)

# Reset the environment if it's done
if terminted or truncated:
observation, info = env.reset()

# Close the environment
env.close()
2 changes: 1 addition & 1 deletion gym_lowcostrobot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
register(
id="LiftCube-v0",
entry_point="gym_lowcostrobot.envs:LiftCubeEnv",
max_episode_steps=500,
max_episode_steps=1500,
)

register(
Expand Down
6 changes: 3 additions & 3 deletions gym_lowcostrobot/assets/low_cost_robot_6dof/lift_cube.xml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<mujoco model="bd1 scene">
<option timestep="0.005"/>
<option timestep="0.005" o_solimp=".89 .91 .01 0.5 2" o_solref=".01 1" />

<compiler angle="radian" autolimits="true"/>
<compiler angle="radian" autolimits="true" convexhull="false"/>

<asset>
<mesh name="base" file="base.stl"/>
Expand Down Expand Up @@ -38,7 +38,7 @@
<body name="cube" pos="0.1 0.1 0.01">
<freejoint name="cube"/>
<inertial pos="0 0 0" mass="0.1" diaginertia="0.00001125 0.00001125 0.00001125"/>
<geom friction="0.5" condim="3" pos="0 0 0" size="0.015 0.015 0.015" type="box" name="cube" rgba="0.5 0 0 1" priority="1"/>
<geom friction="2.0 0.8 0.8" condim="4" pos="0 0 0" size="0.01 0.01 0.01" type="box" name="cube" rgba="0.5 0 0 1" priority="1"/>
</body>


Expand Down
Loading
Loading