forked from shenyann/CATE
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparameterLoader.py
93 lines (78 loc) · 3.85 KB
/
parameterLoader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import argparse
from argparse import Namespace
def argLoader():
parser = argparse.ArgumentParser()
parser.add_argument('--corruption_rate', type=float, default=0.2)
# Network Settings
# Overall
parser.add_argument('--dropout', type=float, default=0.1)
parser.add_argument('--n_vocab', type=int, default=5, help='5 | 11')
parser.add_argument('--PAD', type=int, default=0)
parser.add_argument('--MASK', type=int, default=1)
# Graph Encoder
parser.add_argument('--graph_n_layers', type=int, default=12)
parser.add_argument('--graph_d_model', type=int, default=64)
parser.add_argument('--graph_n_head', type=int, default=8)
parser.add_argument('--graph_d_ff', type=int, default=128)
parser.add_argument('--graph_dropout', type=float, default=0.1)
# Cross_Attention
parser.add_argument('--pair_n_layers', type=int, default=24)
parser.add_argument('--pair_d_model', type=int, default=64)
parser.add_argument('--pair_n_head', type=int, default=8)
parser.add_argument('--pair_d_ff', type=int, default=256)
parser.add_argument('--pair_dropout', type=float, default=0.1)
# CLS
parser.add_argument('--cls_dropout', type=float, default=0.1)
# Tied weight
parser.add_argument('--tied_weights', action='store_true')
# Device
parser.add_argument('--parallel', action='store_true')
parser.add_argument('--device', type=int, default=0)
parser.add_argument('--n_workers', type=int, default=4)
# Data Loading
parser.add_argument('--dataset', type=str, default="data/darts/")
parser.add_argument('--train_data', type=str, default="data/darts/train.pt")
parser.add_argument('--train_pair', type=str, default="data/darts/train_pairs.pt")
parser.add_argument('--valid_data', type=str, default="data/darts/test.pt")
parser.add_argument('--valid_pair', type=str, default="data/darts/test_pairs.pt")
parser.add_argument('--mini', action="store_true")
# Optimizer
parser.add_argument('--learning_rate', type=float, default=1e-3)
parser.add_argument('--weight_decay', type=float, default=0.01)
parser.add_argument('--adam_epsilon', type=float, default=1e-8)
parser.add_argument('--warmup_steps', type=int, default=100)
# Loss Parameters
parser.add_argument('--label_smoothing', type=float, default=0.1)
# Training Parameters
parser.add_argument("--seed", type=int, default=4, help="random seed")
parser.add_argument('--batch_size', type=int, default=1024)
parser.add_argument('--max_epoch', type=int, default=10)
parser.add_argument('--check_point_min', type=int, default=0)
parser.add_argument('--check_point_freq', type=int, default=100)
parser.add_argument('--save_path', type=str, default='model/')
parser.add_argument('--not_save_each_epoch', action='store_true')
parser.add_argument('--pretrained_path', type=str, default='model/')
# Operations
parser.add_argument('--do_train', action='store_true')
args = parser.parse_args()
args.save_each_epoch = not args.not_save_each_epoch
args.graph_encoder = Namespace()
args.graph_encoder.n_layers = args.graph_n_layers
args.graph_encoder.d_model = args.graph_d_model
args.graph_encoder.n_head = args.graph_n_head
args.graph_encoder.d_ff = args.graph_d_ff
args.graph_encoder.dropout = args.graph_dropout
args.graph_encoder.n_vocab = args.n_vocab
args.cross_attention = Namespace()
args.cross_attention.n_layers = args.pair_n_layers
args.cross_attention.d_model = args.pair_d_model
args.cross_attention.n_head = args.pair_n_head
args.cross_attention.d_ff = args.pair_d_ff
args.cross_attention.n_token_type = 2
args.cross_attention.dropout = args.pair_dropout
args.cls = Namespace()
args.cls.d_model = args.pair_d_model
args.cls.n_vocab = args.n_vocab
args.cls.dropout = args.cls_dropout
print(args)
return args