Skip to content

Commit 82331a4

Browse files
committed
fix planar mdp
1 parent b09ec41 commit 82331a4

34 files changed

+62
-31
lines changed

.gitignore

100644100755
File mode changed.

.gitmodules

100644100755
File mode changed.

LICENSE

100644100755
File mode changed.

README.md

100644100755
File mode changed.

data/sample_planar.py

100644100755
File mode changed.

data/sample_pole.py

100644100755
File mode changed.

datasets.py

100644100755
File mode changed.

ilqr.py

100644100755
+36-27
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from mdp.cartpole_mdp import CartPoleMDP
1010
from ilqr_utils import *
1111

12-
seed = 2
12+
seed = 2020
1313
random.seed(seed)
1414
os.environ['PYTHONHASHSEED'] = str(seed)
1515
np.random.seed(seed)
@@ -21,14 +21,16 @@
2121
torch.set_default_dtype(torch.float64)
2222

2323
config_path = {'plane': 'ilqr_config/plane.json', 'swing': 'ilqr_config/swing.json', 'balance': 'ilqr_config/balance.json', 'cartpole': 'ilqr_config/cartpole.json'}
24-
env_task = {'planar': ['plane'], 'pendulum': ['swing', 'balance'], 'cartpole': ['cartpole']}
24+
env_task = {'planar': ['plane'], 'pendulum': ['balance', 'swing'], 'cartpole': ['cartpole']}
2525
env_data_dim = {'planar': (1600, 2, 2), 'pendulum': ((2,48,48), 3, 1), 'cartpole': ((2,80,80), 8, 1)}
2626

27+
2728
def main(args):
2829
env_name = args.env
2930
assert env_name in ['planar', 'pendulum', 'cartpole']
3031
possible_tasks = env_task[env_name]
3132
epoch = args.epoch
33+
x_dim, z_dim, u_dim = env_data_dim[env_name]
3234

3335
ilqr_result_path = 'iLQR_result/' + env_name
3436
if not os.path.exists(ilqr_result_path):
@@ -38,9 +40,23 @@ def main(args):
3840

3941
# each trained model will perform 10 random tasks
4042
random_task_id = np.random.choice(len(possible_tasks), size=10)
41-
x_dim, z_dim, u_dim = env_data_dim[env_name]
43+
all_task_configs = []
4244
if env_name in ['planar', 'pendulum']:
4345
x_dim = np.prod(x_dim)
46+
for task_counter in range(len(random_task_id)):
47+
# pick a random task
48+
random_task = possible_tasks[random_task_id[task_counter]]
49+
# config for this task
50+
with open(config_path[random_task]) as f:
51+
config = json.load(f)
52+
53+
# sample random start and goal state
54+
s_start_min, s_start_max = config['start_min'], config['start_max']
55+
config['s_start'] = np.random.uniform(low=s_start_min, high=s_start_max)
56+
s_goal = config['goal'][np.random.choice(len(config['goal']))]
57+
config['s_goal'] = np.array(s_goal)
58+
59+
all_task_configs.append(config)
4460

4561
# the folder where all trained models are saved
4662
folder = 'result/' + env_name
@@ -70,12 +86,9 @@ def main(args):
7086

7187
# run the task with 10 different start and goal states for a particular model
7288
avg_percent = 0.0
73-
for task_counter in range(10):
74-
# pick a random task
75-
random_task = possible_tasks[random_task_id[task_counter]]
76-
with open(config_path[random_task]) as f:
77-
config = json.load(f)
78-
print('Performing task: ' + str(random_task))
89+
for task_counter, config in enumerate(all_task_configs):
90+
91+
print('Performing task %d: ' %(task_counter) + str(config['task']))
7992

8093
# environment specification
8194
horizon = config['horizon_prob']
@@ -94,11 +107,8 @@ def main(args):
94107
alpha_mult = config['alpha_mult']
95108
alpha_min = config['alpha_min']
96109

97-
# sample random start and goal state
98-
s_start_min, s_start_max = config['start_min'], config['start_max']
99-
s_start = np.random.uniform(low=s_start_min, high=s_start_max)
100-
s_goal = config['goal'][np.random.choice(len(config['goal']))]
101-
s_goal = np.array(s_goal)
110+
s_start = config['s_start']
111+
s_goal = config['s_goal']
102112

103113
# mdp
104114
if env_name == 'planar':
@@ -107,8 +117,6 @@ def main(args):
107117
elif env_name == 'pendulum':
108118
mdp = PendulumMDP(frequency=config['frequency'],
109119
noise=config['noise'], torque=config['torque'])
110-
elif env_name == 'cartpole':
111-
mdp = CartPoleMDP(frequency=config['frequency'], noise=config['noise'])
112120
# get z_start and z_goal
113121
x_start = get_x_data(mdp, s_start, config)
114122
x_goal = get_x_data(mdp, s_goal, config)
@@ -145,6 +153,9 @@ def main(args):
145153
accept = False # if any alpha is accepted
146154
while alpha > alpha_min:
147155
z_seq_cand, u_seq_cand = forward(z_seq, all_actions_trajs[traj_id], k_small, K_big, dynamics, alpha)
156+
# u_seq_cand = forward(all_actions_trajs[traj_id], k_small, K_big, A_seq, B_seq, alpha)
157+
# z_seq_cand = compute_latent_traj(z_start_horizon, u_seq_cand, dynamics)
158+
# cost_cand = latent_cost(R_z, R_u, z_seq_cand, z_goal, u_seq_cand)
148159
cost_cand = latent_cost(R_z, R_u, z_seq_cand, z_goal, u_seq_cand)
149160
if cost_cand < current_cost: # accept the trajectory candidate
150161
accept = True
@@ -165,11 +176,9 @@ def main(args):
165176
latent_cost_list[i] = np.inf
166177
traj_opt_id = np.argmin(latent_cost_list)
167178
action_chosen = all_actions_trajs[traj_opt_id][0]
168-
# action_chosen = np.clip(action_chosen, mdp.action_range[0], mdp.action_range[1])
169179
actions_final.append(action_chosen)
170180
s_start_horizon, z_start_horizon = update_horizon_start(mdp, s_start_horizon,
171181
action_chosen, encoder, config)
172-
# check if task fails
173182
# if mdp.is_fail(s_start_horizon):
174183
# break
175184
all_actions_trajs = refresh_actions_trajs(all_actions_trajs, traj_opt_id, mdp,
@@ -178,19 +187,20 @@ def main(args):
178187

179188
obs_traj, goal_counter = traj_opt_actions(s_start, actions_final, mdp)
180189
# compute the percentage close to goal
181-
percent = goal_counter / horizon
182-
print('Success rate: %.2f' % (percent))
183-
avg_percent += percent
190+
success_rate = goal_counter / horizon
191+
print ('Success rate: %.2f' % (success_rate))
192+
percent = success_rate
193+
avg_percent += success_rate
184194
with open(model_path + '/result.txt', 'a+') as f:
185-
f.write(random_task + ': ' + str(percent) + '\n')
195+
f.write(config['task'] + ': ' + str(percent) + '\n')
186196

187197
# save trajectory as gif file
188198
gif_path = model_path + '/task_{:01d}.gif'.format(task_counter + 1)
189-
save_traj(obs_traj, mdp.render(s_goal).squeeze(), gif_path, random_task)
199+
save_traj(obs_traj, mdp.render(s_goal).squeeze(), gif_path, config['task'])
190200

191201
avg_percent = avg_percent / 10
192-
print('Average success rate: ' + str(avg_percent))
193-
print("====================================")
202+
print ('Average success rate: ' + str(avg_percent))
203+
print ("====================================")
194204
avg_model_percent += avg_percent
195205
if avg_percent > best_model_percent:
196206
best_model = log_base
@@ -203,11 +213,10 @@ def main(args):
203213
f.write('Average percentage of all models: ' + str(avg_model_percent) + '\n')
204214
f.write('Best model: ' + best_model + ', best percentage: ' + str(best_model_percent))
205215

206-
207216
if __name__ == '__main__':
208217
parser = argparse.ArgumentParser(description='run iLQR')
209218
parser.add_argument('--env', required=True, type=str, help='environment to perform')
210219
parser.add_argument('--epoch', required=True, type=str, help='number of epochs to load model')
211220
args = parser.parse_args()
212221

213-
main(args)
222+
main(args)

ilqr_config/balance.json

100644100755
File mode changed.

ilqr_config/cartpole.json

100644100755
File mode changed.

ilqr_config/plane.json

100644100755
+2-2
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
"ilqr_iters": 4,
2323
"horizon_prob": 40,
2424
"plan_len": 10,
25-
"uniform_trajs": 1,
26-
"extreme_trajs": 0,
25+
"uniform_trajs": 3,
26+
"extreme_trajs": 3,
2727

2828
"obs_shape": [40, 40],
2929
"action_dim": 2,

ilqr_config/swing.json

100644100755
File mode changed.

ilqr_utils.py

100644100755
+17
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,23 @@ def forward(z_seq, u_seq, k, K, dynamics, alpha):
9595
z_seq_new.append(z_new.squeeze().numpy())
9696
return np.array(z_seq_new), np.array(u_seq_new)
9797

98+
# def forward(u_seq, k_seq, K_seq, A_seq, B_seq, alpha):
99+
# """
100+
# update the trajectory, given k and K
101+
# !!!! update using the linearization matricies (A and B), not the learned dynamics
102+
# """
103+
# u_new_seq = []
104+
# plan_len = len(u_seq)
105+
# z_dim = K_seq[0].shape[1]
106+
# for i in range(0, plan_len):
107+
# if i == 0:
108+
# z_delta = np.zeros(z_dim)
109+
# else:
110+
# z_delta = np.matmul(A_seq[i-1], z_delta) + np.matmul(B_seq[i-1], u_delta)
111+
# u_delta = alpha * (k_seq[i] + np.matmul(K_seq[i], z_delta))
112+
# u_new_seq.append(u_seq[i] + u_delta)
113+
# return np.array(u_new_seq)
114+
98115
def get_x_data(mdp, state, config):
99116
image_data = mdp.render(state).squeeze()
100117
x_dim = config['obs_shape']

latent_map_pendulum.py

100644100755
File mode changed.

latent_map_planar.py

100644100755
+3
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ def draw_latent_map(model, mdp):
7171
all_z.append(np.copy(z))
7272
all_z = np.array(all_z)
7373

74+
avg_norm_2 = np.mean(np.sum(all_z ** 2, axis=1))
75+
print('avg norm 2: ' + str(avg_norm_2))
76+
7477
# normalize and scale to plot
7578
z_min = np.min(all_z, axis = 0)
7679
all_z = np.round(20 * (all_z - z_min) + 30).astype(np.int)

losses.py

100644100755
File mode changed.

mdp/cartpole_mdp.py

100644100755
File mode changed.

mdp/common.py

100644100755
File mode changed.

mdp/pendulum_mdp.py

100644100755
File mode changed.

mdp/plane_obstacles_mdp.py

100644100755
+4-2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ class PlanarObstaclesMDP(object):
1616
def __init__(self, rw_rendered=1, max_step=3,
1717
goal=[37,37], goal_thres=2, noise = 0):
1818
self.rw_rendered = rw_rendered
19+
self.max_step = max_step
1920
self.action_range = np.array([-max_step, max_step])
2021
self.goal = goal
2122
self.goal_thres = goal_thres
@@ -35,9 +36,10 @@ def is_valid_state(self, s):
3536
return False
3637
return True
3738

38-
def take_step(self, s, u): # compute the next state given the current state and action
39+
def take_step(self, s, u, anneal_ratio=0.9): # compute the next state given the current state and action
3940
u = np.clip(u, self.action_range[0], self.action_range[1])
40-
s_next = s + u # the true dynamics
41+
42+
s_next = np.clip(s + u, self.position_range[0], self.position_range[1])
4143
if not self.is_valid_state(s_next):
4244
return s
4345
return s_next

mdp/pole_base.py

100644100755
File mode changed.

networks.py

100644100755
File mode changed.

pcc.yml

100644100755
File mode changed.

pcc_model.py

100644100755
File mode changed.

sample_results/cartpole_1.gif

100644100755
File mode changed.

sample_results/cartpole_2.gif

100644100755
File mode changed.

sample_results/latent_map_pend.png

100644100755
File mode changed.

sample_results/latent_map_sample.png

100644100755
File mode changed.

sample_results/pendulum_1.gif

100644100755
File mode changed.

sample_results/pendulum_2.gif

100644100755
File mode changed.

sample_results/planar_1.gif

100644100755
File mode changed.

sample_results/planar_2.gif

100644100755
File mode changed.

train_pcc.py

100644100755
File mode changed.

true_map.png

100644100755
File mode changed.

0 commit comments

Comments
 (0)