-
Notifications
You must be signed in to change notification settings - Fork 82
/
Copy pathswin_test.py
185 lines (165 loc) · 6.97 KB
/
swin_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# Copyright (c) 2022 Graphcore Ltd. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import time
import argparse
import datetime
import numpy as np
import torch
import torch.nn as nn
from config import get_config
from models.build import build_pipeline as build_model
from timm.loss import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy
from optimizer import build_optimizer
from options import get_options
import poptorch
from collections import OrderedDict
from torchvision import datasets, transforms
from torch.utils.data import Dataset
from lr_scheduler import build_scheduler
from timm.models import resume_checkpoint
from dataset.ipu_mixup import Mixup
from pathlib import Path
import unittest
import pytest
import ctypes
import subprocess
swin_root_path = str(Path(__file__).parent)
sys.path.append(swin_root_path)
class ReturnIndexDataset(datasets.ImageFolder):
def __getitem__(self, idx):
img, lab = super(ReturnIndexDataset, self).__getitem__(idx)
return img, lab
def parse_option():
parser = argparse.ArgumentParser("Swin Transformer training and evaluation script", add_help=False)
parser.add_argument(
"--cfg",
default="SWIN_LARGE_224_22K_FINETUNE_1K_FP16_POD16",
type=str,
metavar="PATH",
help="path to config file",
)
parser.add_argument("--batch-size", type=int, help="batch size for single GPU")
parser.add_argument("--num-workers", type=int, default=8, help="batch size for single GPU")
parser.add_argument("--weights", type=str, help="weights for model")
parser.add_argument("--device", type=str, default="", choices=["cpu", "ipu", "gpu"])
parser.add_argument("--alignment", action="store_true", help="if alignment fwd or bwd")
parser.add_argument("--half", default=True, action="store_true", help="use half")
parser.add_argument(
"--resume",
default="",
type=str,
metavar="PATH",
help="Resume full model and optimizer state from checkpoint (default: none)",
)
parser.add_argument("--data-path", type=str, metavar="FILE", help="path to dataset")
parser.add_argument("--checkpoint-output-dir", type=str, metavar="FILE", help="path to save output files")
parser.add_argument("--pretrained-model", type=str, help="path to init checkpoint when fine tune models")
parser.add_argument("--ga", type=int, help="Gradient Accumulations Steps")
parser.add_argument("--amp", type=float, help="Available memory proportion")
parser.add_argument("--rts", action="store_true", help="Replicated tensor sharding")
parser.add_argument("--compile-only", action="store_true", help="Compile only")
args, unparsed = parser.parse_known_args()
config = get_config(args)
return args, config
def get_random_datum(config):
result = []
batch_size = config.DATA.BATCH_SIZE * config.IPU.NUM_LOCALREPLICA * config.IPU.GRADIENT_ACCUMULATION_STEPS
if config.PRECISION[0] == "half":
use_half = True
else:
use_half = False
dataset = GeneratedDataset(
shape=[3, config.DATA.IMG_SIZE[0], config.DATA.IMG_SIZE[0]], size=batch_size, half_precision=use_half
)
data = (dataset[i] for i in range(batch_size))
for batches in zip(*data):
result.append(torch.stack(batches))
return result
class GeneratedDataset(Dataset):
"""
Generated dataset creates a random dataset with the given shape and precision.
The size determines the number of items in the dataset.
"""
def __init__(self, shape, size=60000, half_precision=True): # use_half
self.size = size
self.half_precision = half_precision
self.data_shape = shape
def __len__(self):
return self.size
def __getitem__(self, index):
synthetic_data = torch.rand(self.data_shape)
synthetic_label = torch.randint(0, 2, [1], dtype=torch.long)
if self.half_precision:
synthetic_data = synthetic_data.half()
return synthetic_data, synthetic_label
def compile_model(poptorch_model, config):
datum = get_random_datum(config)
(pre_input, pre_label) = datum
mixup_fn = Mixup(
mixup_alpha=config.AUG.MIXUP,
cutmix_alpha=config.AUG.CUTMIX,
cutmix_minmax=config.AUG.CUTMIX_MINMAX,
prob=config.AUG.MIXUP_PROB,
switch_prob=config.AUG.MIXUP_SWITCH_PROB,
mode=config.AUG.MIXUP_MODE,
label_smoothing=config.MODEL.LABEL_SMOOTHING,
num_classes=config.MODEL.NUM_CLASSES,
)
pre_input, pre_label = mixup_fn(pre_input, pre_label)
poptorch_model.compile(pre_input, pre_label)
return pre_input, pre_label
class TestSWIN(unittest.TestCase):
@pytest.mark.ipus(8)
def test_swin_model(self):
cmd = "make"
ret = subprocess.check_call(cmd, shell=True, cwd=swin_root_path)
assert os.path.exists(os.path.join(swin_root_path, "custom_ops.so")), "please compile custom op"
ctypes.cdll.LoadLibrary(os.path.join(swin_root_path, "custom_ops.so"))
args, config = parse_option()
config.defrost()
config.IPU.NUM_LOCALREPLICA = 1
config.IPU.GRADIENT_ACCUMULATION_STEPS = 16
config.freeze()
seed = config.SEED
torch.manual_seed(seed)
np.random.seed(seed)
opts = get_options(config)
self.train(args, opts, config)
def train(self, args, opts, config):
if config.AUG.MIXUP > 0.0:
# smoothing is handled with mixup label transform
criterion = SoftTargetCrossEntropy()
elif config.MODEL.LABEL_SMOOTHING > 0.0:
criterion = LabelSmoothingCrossEntropy(smoothing=config.MODEL.LABEL_SMOOTHING)
else:
criterion = torch.nn.CrossEntropyLoss()
model = build_model(config=config, train_loss_fn=criterion)
if args.half:
print("use half")
model.half()
optimizer = build_optimizer(config, model)
model = poptorch.trainingModel(model.train(), opts, optimizer=optimizer)
data, targets = compile_model(model, config)
lr_scheduler = build_scheduler(config, optimizer, 10008)
start_epoch = 0
if lr_scheduler is not None and start_epoch > 0:
lr_scheduler.step(start_epoch)
for step in range(0, 100):
time_s = time.time()
model.setOptimizer(lr_scheduler.optimizer)
_, loss = model(data, targets)
time_e = time.time()
print("batch time:", time_e - time_s)
assert loss.mean() <= 10.0, "loss error"