-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
75 lines (66 loc) · 2.49 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# -*- coding: utf-8 -*-
"""
Created on Sun Feb 28 11:02:18 2021
@author: Leon Jovanovic
"""
import gym
from agent import Agent
import atari_wrappers
from torch.utils.tensorboard import SummaryWriter
import time
# ---------------------------------Parameters----------------------------------
DQN_HYPERPARAMS = {
'eps_start': 1,
'eps_end': 0.02,
'eps_decay': 10 ** 5,
'buffer_size': 15000,
'buffer_minimum': 10001,
'learning_rate': 5e-5,
'gamma': 0.99,
'n_iter_update_nn': 1000,
'multi_step': 2,
'double_dqn': True,
'dueling': False
}
ENV_NAME = "PongNoFrameskip-v4"
RECORD = True
MAX_GAMES = 500
DEVICE = 'cuda'
BATCH_SIZE = 32
# For TensorBoard
SUMMARY_WRITER = True
LOG_DIR = 'content/runs'
name = 'DQN Multi-step=%d,Double=%r,Dueling=%r' % (DQN_HYPERPARAMS['multi_step'], DQN_HYPERPARAMS['double_dqn'], DQN_HYPERPARAMS['dueling'])
# For Telegram
TG_BOT = True
# ------------------------Create enviroment and agent--------------------------
env = atari_wrappers.make_env("PongNoFrameskip-v4") # gym.make("PongNoFrameskip-v4")
# For recording few seelcted episodes. 'force' means overwriting earlier recordings
if RECORD:
env = gym.wrappers.Monitor(env, "main-" + ENV_NAME, force=True)
obs = env.reset()
# Create TensorBoard writer that will create graphs
writer = SummaryWriter(log_dir=LOG_DIR + '/' + name + str(time.time())) if SUMMARY_WRITER else None
# Create agent that will learn
agent = Agent(env, hyperparameters=DQN_HYPERPARAMS, device=DEVICE, writer=writer, max_games=MAX_GAMES, tg_bot=TG_BOT)
# --------------------------------Learning-------------------------------------
num_games = 0
while num_games < MAX_GAMES:
# Select one action with e-greedy policy and observe s,a,s',r and done
action = agent.select_eps_greedy_action(obs)
# Take that action and observe s, a, s', r and done
new_obs, reward, done, _ = env.step(action)
# Add s, a, s', r to buffer B
agent.add_to_buffer(obs, action, new_obs, reward, done)
# Sample a mini-batch from buffer B if B is large enough. If not skip until it is.
# Use that mini-batch to improve NN value function approximation
agent.sample_and_improve(BATCH_SIZE)
obs = new_obs
if done:
num_games = num_games + 1
agent.print_info()
agent.reset_parameters()
obs = env.reset()
writer.close()
gym.wrappers.Monitor.close(env)
# !tensorboard --logdir="D:\Users\Leon Jovanovic\Documents\Reinforcement Learning\reinforcement-learning-atari-pong\content\runs" --host=127.0.0.1