-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathwrapper_dino.py
100 lines (89 loc) · 4.19 KB
/
wrapper_dino.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
"""
###########################################################################
Model wrapper for DINO ViTs to extract both attention and features.
Utilizes code from the original DINO repository:
(https://github.com/facebookresearch/dino) Original copyright notice below.
Written by: Matthew Walmer
###########################################################################
"""
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
import torch
from torchvision import transforms as pth_transforms
from meta_utils.arch_conv import letter2arch
from meta_utils.feature_extractor import FeatureExtractor
from meta_utils.block_mapper import block_mapper
from meta_utils.preproc import standard_transform
sys.path.append('dino/')
import vision_transformer as vits
class DINO_Wrapper:
def __init__(self, arch, patch, imsize, extract_mode='none', blk_sel='all'):
assert extract_mode in ['none', 'attn', 'feat']
if extract_mode == 'none':
print('WARNING: wrapper running in NONE mode, no tensors will be extracted')
print('only use this mode if extracting features separately')
self.arch = arch
self.patch = patch
self.imsize = imsize
self.extract_mode = extract_mode
self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
# create model identifier and test configuration
self.mod_id = 'DINO-ViT-%s-%i-%i'%(arch, patch, imsize)
if arch not in ['S','B'] or patch not in [8, 16]:
print('ERROR: Invalid DINO config ' + self.mod_id)
exit(-1)
# transform
self.transform = standard_transform('dino', imsize)
# handle block selection
self.blk_sel = blk_sel
self.blk_idxs = block_mapper(arch, blk_sel)
def load(self):
# load model
dino_arch = 'vit_%s'%letter2arch(self.arch)
self.model = vits.__dict__[dino_arch](patch_size=self.patch, num_classes=0)
for p in self.model.parameters():
p.requires_grad = False
self.model.eval()
self.model.to(self.device)
url = None
if dino_arch == "vit_small" and self.patch == 16:
url = "dino_deitsmall16_pretrain/dino_deitsmall16_pretrain.pth"
elif dino_arch == "vit_small" and self.patch == 8:
url = "dino_deitsmall8_300ep_pretrain/dino_deitsmall8_300ep_pretrain.pth" # model used for visualizations in our paper
elif dino_arch == "vit_base" and self.patch == 16:
url = "dino_vitbase16_pretrain/dino_vitbase16_pretrain.pth"
elif dino_arch == "vit_base" and self.patch == 8:
url = "dino_vitbase8_pretrain/dino_vitbase8_pretrain.pth"
if url is not None:
print("Since no pretrained weights have been provided, we load the reference pretrained DINO weights.")
state_dict = torch.hub.load_state_dict_from_url(url="https://dl.fbaipublicfiles.com/dino/" + url)
self.model.load_state_dict(state_dict, strict=True)
else:
print("ERROR: There is no reference weights available for this model")
exit(-1)
# prepare hooks - depending on extract_mode
layers = []
for idx in self.blk_idxs:
if self.extract_mode == 'none':
continue
if self.extract_mode == 'attn':
layers.append(self.model.blocks[idx].attn.attn_drop)
if self.extract_mode == 'feat':
layers.append(self.model.blocks[idx])
self.extractor = FeatureExtractor(self.model, layers)
def get_activations(self, x):
acts = self.extractor(x.to(self.device))
return acts