-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathinference_clustering.py
89 lines (75 loc) · 2.16 KB
/
inference_clustering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
import torch
import numpy as np
from time import time
from tqdm import tqdm
from scipy import stats
from sklearn import preprocessing
from sklearn.decomposition import PCA
import sys
sys.path.append('libs')
from libKMCUDA import kmeans_cuda
K = 90 # number of clusters
N = 128 # number of features
P = False # PCA decomposition
L2 = True # L2 normalization
R = 1 # repeat iter [1,10]
# fetch features
feature_folder = 'results/VISION-1500-Ours-ResNet'
#feature_folder = 'results/VISION-1500-Ours-DenseNet'
path = os.listdir(feature_folder)
path.sort()
n_samples = len(path)
X = []
X_lb = []
for i in tqdm(range(len(path))):
tmp = np.load(os.path.join(feature_folder, path[i]))
tmp = tmp[(R-1)*N:R*N,:] # fetch N features
if L2 == True: # l2-normalize
tmp = preprocessing.normalize(tmp, norm='l2')
tmp_lb = [int(path[i][:path[i].find('_')])] * tmp.shape[0]
X.append(tmp)
X_lb.append(tmp_lb)
X = np.concatenate(X, axis=0)
X_lb = np.concatenate(X_lb, axis=0)
# PCA decomposition
if P == True:
pca = PCA(n_components=256, svd_solver='auto')
X = pca.fit_transform(X)
print(X.shape)
# apply kmeans
centroids, k_lb = kmeans_cuda(X, K, tolerance=0.01, init='k-means++', metric='L2', verbosity=1, seed=0)
centroids = torch.Tensor(centroids)
# generate mapping from k_lb to lb
mapping = np.zeros(K)
missed_i = []
for i in range(K):
tmp = X_lb[k_lb==i]
if tmp.shape[0] == 0:
mapping[i] = -1
missed_i.append(i)
else:
mapping[i] = stats.mode(tmp)[0][0]
# cluster
acc_count = 0
for i in tqdm(range(len(path))):
if P == True:
x = X[i*N:(i+1)*N]
else:
x = np.load(os.path.join(feature_folder, path[i]))
x = x[(R-1)*N:R*N,:]
if L2 == True:
x = preprocessing.normalize(x, norm='l2')
x_lb = int(path[i][:path[i].find('_')])
preds = []
for j in range(len(x)):
with torch.no_grad():
q = torch.Tensor(x[j:j+1])
dist = torch.norm(q-centroids, p='fro', dim=1, keepdim=False)
dist[missed_i] = float('inf')
preds.append(np.array([int(dist.argmin().numpy())]))
preds = np.concatenate(preds, axis=0)
pred = stats.mode(preds)[0][0]
if mapping[pred] == x_lb:
acc_count += 1
print('ACC: %d/%d = %.2f%s' % (acc_count, n_samples, acc_count/n_samples*100, '%'))