-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathagent_parser_configs.yaml
164 lines (163 loc) · 4 KB
/
agent_parser_configs.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# dummy testing agents
random:
single:
action:
alias: a
type: int
default: 0
help: "Which action the agent will choose (default: 0)"
# standard RL agents
tabular-q:
lr: &learnrate
alias: l
type: float
required: true
help: "Learning rate (required)"
epsilon: &epsilon
alias: e
type: float
default: 0.01
help: "Exploration constant for epsilon greedy policy (default: .01)"
epsilon-anneal: &epsilon-anneal
alias: dl
type: int
default: 100000
help: "Number of timesteps to linearly anneal epsilon exploration (default: 100,000)"
deep-q:
lr: *learnrate
epsilon: *epsilon
epsilon-anneal: *epsilon-anneal
replay-capacity:
alias: r
type: int
default: 10000
help: "Capacity of replay buffer (default: 10000)"
sync-every:
alias: s
type: int
default: 10000
help: "Number of timesteps to wait before syncing target network (default: 10000)"
n-layers: &layers
alias: ls
type: int
default: 2
help: "Number (non-input) layers for Q network (default: 2)"
n-hidden: &hidden
alias: hd
type: int
default: 100
help: "Number of neurons per hidden layer (default: 100)"
batch-size: &batchsize
alias: b
type: int
default: 64
help: "Batch size for model training (default: 64)"
device: &device
alias: dv
type: int
default: 0
help: "If using CUDA, which device to use for training (default: 0)"
log-gradients: &gradlog
alias: lg
action: store_true
help: "Log gradients to a Tensorboard histogram (default: False)"
ppo-mlp:
lr: *learnrate
rollouts: &rollouts
alias: r
type: int
required: true
help: "Number of rollouts for generating training data (required)"
# horizon:
# alias: hz
# type: int
# required: true
# help: "Rollout horizon (required)"
epochs: &epochs
alias: e
type: int
required: true
help: "Training epochs (required)"
batch-size: *batchsize
clipping: &clipping
alias: c
type: float
default: 0.2
help: "Epsilon constant for clipping in surrogate loss (default: 0.2)"
critic-coeff: &critic_coeff
alias: cc
type: float
default: 1.0
help: "Scaling coefficient of critic loss; should be in (0,1] (default: 1)"
# gae-coeff:
# alias: g
# type: float
# default: .95
# help: "Generalized advantage estimation coefficient (default: .95)"
entropy-bonus: &entropy_bonus
alias: eb
type: float
default: 0.01
help: "Entropy bonus exploration coefficient (default: 0.01)"
n-layers: *layers
n-hidden: *hidden
device: *device
log-gradients: *gradlog
ppo-cnn:
n-channels: &n_channels
alias: ch
type: int
default: 5
help: "Number of channels for the convolutional layers (default: 5)"
lr: *learnrate
rollouts: *rollouts
epochs: *epochs
batch-size: *batchsize
clipping: *clipping
critic-coeff: *critic_coeff
entropy-bonus: *entropy_bonus
n-layers: *layers
device: *device
log-gradients: *gradlog
# (purportedly) safe RL agents
ppo-crmdp:
n-channels: *n_channels
lr: *learnrate
rollouts: *rollouts
epochs: *epochs
batch-size: *batchsize
clipping: *clipping
critic-coeff: *critic_coeff
entropy-bonus: *entropy_bonus
n-layers: *layers
device: *device
log-gradients: *gradlog
tabular-ssq:
lr: *learnrate
epsilon: *epsilon
epsilon-anneal: *epsilon-anneal
budget:
alias: b
type: int
required: y
help: "Max number of queries of H for supervision in SSRL (required)"
warmup:
alias: w
type: float
default: .5
help: "Proportion of budget to spend during warmup phase (default: .5)"
fuzzy-query:
alias: f
type: float
default: 1.0
help: "Probability of querying H while online if budget and delta conditions are met (default: 1.0)"
delta:
alias: dt
type: float
default: .9
help: "Minimum quantile of episode returns for allowing to query H (default: .9)"
C-prior:
alias: p
type: float
default: .01
help: "Prior for state corruption (default: .01)"