|
7 | 7 | metric="charts/episodic_return",
|
8 | 8 | metric_last_n_average_window=50,
|
9 | 9 | direction="maximize",
|
| 10 | + aggregation_type="average", |
10 | 11 | target_scores={
|
11 | 12 | "CartPole-v1": [0, 500],
|
12 | 13 | "Acrobot-v1": [-500, 0],
|
13 | 14 | },
|
14 | 15 | params_fn=lambda trial: {
|
15 | 16 | "learning-rate": trial.suggest_loguniform("learning-rate", 0.0003, 0.003),
|
16 | 17 | "num-minibatches": trial.suggest_categorical("num-minibatches", [1, 2, 4]),
|
17 |
| - "update-epochs": trial.suggest_categorical("update-epochs", [1, 2, 4]), |
| 18 | + "update-epochs": trial.suggest_categorical("update-epochs", [1, 2, 4, 8]), |
18 | 19 | "num-steps": trial.suggest_categorical("num-steps", [5, 16, 32, 64, 128]),
|
19 | 20 | "vf-coef": trial.suggest_uniform("vf-coef", 0, 5),
|
20 | 21 | "max-grad-norm": trial.suggest_uniform("max-grad-norm", 0, 5),
|
21 |
| - "total-timesteps": 10000, |
| 22 | + "total-timesteps": 100000, |
22 | 23 | "num-envs": 16,
|
23 | 24 | },
|
24 | 25 | pruner=optuna.pruners.MedianPruner(n_startup_trials=5),
|
25 | 26 | sampler=optuna.samplers.TPESampler(),
|
26 |
| - # wandb_kwargs={"project": "cleanrl"}, |
27 | 27 | )
|
28 | 28 | tuner.tune(
|
29 |
| - num_trials=10, |
| 29 | + num_trials=100, |
30 | 30 | num_seeds=3,
|
31 | 31 | )
|
0 commit comments