Skip to content

Commit 3704bbe

Browse files
mdouzefacebook-github-bot
authored andcommitted
Add GIST1M to datasets
Summary: GIST1M is on the fair cluster but was not added to the datsets.py Reviewed By: alexanderguzhva Differential Revision: D45276664 fbshipit-source-id: 8db41d61b78983f5d01dedca1790618f80f6bc78
1 parent 1cb1e54 commit 3704bbe

File tree

1 file changed

+30
-0
lines changed

1 file changed

+30
-0
lines changed

contrib/datasets.py

+30
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,33 @@ def get_groundtruth(self, k=None):
311311
gt = gt[:, :k]
312312
return gt
313313

314+
class DatasetGIST1M(Dataset):
315+
"""
316+
The original dataset is available at: http://corpus-texmex.irisa.fr/
317+
(ANN_SIFT1M)
318+
"""
319+
320+
def __init__(self):
321+
Dataset.__init__(self)
322+
self.d, self.nt, self.nb, self.nq = 960, 100000, 1000000, 10000
323+
self.basedir = dataset_basedir + 'gist1M/'
324+
325+
def get_queries(self):
326+
return fvecs_read(self.basedir + "gist_query.fvecs")
327+
328+
def get_train(self, maxtrain=None):
329+
maxtrain = maxtrain if maxtrain is not None else self.nt
330+
return fvecs_read(self.basedir + "gist_learn.fvecs")[:maxtrain]
331+
332+
def get_database(self):
333+
return fvecs_read(self.basedir + "gist_base.fvecs")
334+
335+
def get_groundtruth(self, k=None):
336+
gt = ivecs_read(self.basedir + "gist_groundtruth.ivecs")
337+
if k is not None:
338+
assert k <= 100
339+
gt = gt[:, :k]
340+
return gt
314341

315342

316343
def dataset_from_name(dataset='deep1M', download=False):
@@ -321,6 +348,9 @@ def dataset_from_name(dataset='deep1M', download=False):
321348
if dataset == 'sift1M':
322349
return DatasetSIFT1M()
323350

351+
elif dataset == 'gist1M':
352+
return DatasetGIST1M()
353+
324354
elif dataset.startswith('bigann'):
325355
dbsize = 1000 if dataset == "bigann1B" else int(dataset[6:-1])
326356
return DatasetBigANN(nb_M=dbsize)

0 commit comments

Comments
 (0)