|
| 1 | +# Copyright (c) Facebook, Inc. and its affiliates. |
| 2 | +# |
| 3 | +# This source code is licensed under the MIT license found in the |
| 4 | +# LICENSE file in the root directory of this source tree. |
| 5 | + |
| 6 | +"""This script tests a few failure cases of Faiss and whether they are handled |
| 7 | +properly.""" |
| 8 | + |
| 9 | +import numpy as np |
| 10 | +import unittest |
| 11 | +import faiss |
| 12 | + |
| 13 | +from common_faiss_tests import get_dataset_2 |
| 14 | +from faiss.contrib.datasets import SyntheticDataset |
| 15 | + |
| 16 | + |
| 17 | +class TestValidIndexParams(unittest.TestCase): |
| 18 | + |
| 19 | + def test_IndexIVFPQ(self): |
| 20 | + d = 32 |
| 21 | + nb = 1000 |
| 22 | + nt = 1500 |
| 23 | + nq = 200 |
| 24 | + |
| 25 | + (xt, xb, xq) = get_dataset_2(d, nt, nb, nq) |
| 26 | + |
| 27 | + coarse_quantizer = faiss.IndexFlatL2(d) |
| 28 | + index = faiss.IndexIVFPQ(coarse_quantizer, d, 32, 8, 8) |
| 29 | + index.cp.min_points_per_centroid = 5 # quiet warning |
| 30 | + index.train(xt) |
| 31 | + index.add(xb) |
| 32 | + |
| 33 | + # invalid nprobe |
| 34 | + index.nprobe = 0 |
| 35 | + k = 10 |
| 36 | + self.assertRaises(RuntimeError, index.search, xq, k) |
| 37 | + |
| 38 | + # invalid k |
| 39 | + index.nprobe = 4 |
| 40 | + k = -10 |
| 41 | + self.assertRaises(AssertionError, index.search, xq, k) |
| 42 | + |
| 43 | + # valid params |
| 44 | + index.nprobe = 4 |
| 45 | + k = 10 |
| 46 | + D, nns = index.search(xq, k) |
| 47 | + |
| 48 | + self.assertEqual(D.shape[0], nq) |
| 49 | + self.assertEqual(D.shape[1], k) |
| 50 | + |
| 51 | + def test_IndexFlat(self): |
| 52 | + d = 32 |
| 53 | + nb = 1000 |
| 54 | + nt = 0 |
| 55 | + nq = 200 |
| 56 | + |
| 57 | + (xt, xb, xq) = get_dataset_2(d, nt, nb, nq) |
| 58 | + index = faiss.IndexFlat(d, faiss.METRIC_L2) |
| 59 | + |
| 60 | + index.add(xb) |
| 61 | + |
| 62 | + # invalid k |
| 63 | + k = -5 |
| 64 | + self.assertRaises(AssertionError, index.search, xq, k) |
| 65 | + |
| 66 | + # valid k |
| 67 | + k = 5 |
| 68 | + D, I = index.search(xq, k) |
| 69 | + |
| 70 | + self.assertEqual(D.shape[0], nq) |
| 71 | + self.assertEqual(D.shape[1], k) |
| 72 | + |
| 73 | + |
| 74 | +class TestReconsException(unittest.TestCase): |
| 75 | + |
| 76 | + def test_recons_exception(self): |
| 77 | + |
| 78 | + d = 64 # dimension |
| 79 | + nb = 1000 |
| 80 | + rs = np.random.RandomState(1234) |
| 81 | + xb = rs.rand(nb, d).astype('float32') |
| 82 | + nlist = 10 |
| 83 | + quantizer = faiss.IndexFlatL2(d) # the other index |
| 84 | + index = faiss.IndexIVFFlat(quantizer, d, nlist) |
| 85 | + index.train(xb) |
| 86 | + index.add(xb) |
| 87 | + index.make_direct_map() |
| 88 | + |
| 89 | + index.reconstruct(9) |
| 90 | + |
| 91 | + self.assertRaises( |
| 92 | + RuntimeError, |
| 93 | + index.reconstruct, 100001 |
| 94 | + ) |
| 95 | + |
| 96 | + def test_reconstuct_after_add(self): |
| 97 | + index = faiss.index_factory(10, 'IVF5,SQfp16') |
| 98 | + index.train(faiss.randn((100, 10), 123)) |
| 99 | + index.add(faiss.randn((100, 10), 345)) |
| 100 | + index.make_direct_map() |
| 101 | + index.add(faiss.randn((100, 10), 678)) |
| 102 | + |
| 103 | + # should not raise an exception |
| 104 | + index.reconstruct(5) |
| 105 | + print(index.ntotal) |
| 106 | + index.reconstruct(150) |
| 107 | + |
| 108 | + |
| 109 | +class TestNaN(unittest.TestCase): |
| 110 | + """ NaN values handling is transparent: they don't produce results |
| 111 | + but should not crash. The tests below cover a few common index types. |
| 112 | + """ |
| 113 | + |
| 114 | + def do_test_train(self, factory_string): |
| 115 | + """ NaN and Inf should raise an exception at train time """ |
| 116 | + ds = SyntheticDataset(32, 200, 20, 10) |
| 117 | + index = faiss.index_factory(ds.d, factory_string) |
| 118 | + # try to train with NaNs |
| 119 | + xt = ds.get_train().copy() |
| 120 | + xt[:, ::4] = np.nan |
| 121 | + self.assertRaises(RuntimeError, index.train, xt) |
| 122 | + |
| 123 | + def test_train_IVFSQ(self): |
| 124 | + self.do_test_train("IVF10,SQ8") |
| 125 | + |
| 126 | + def test_train_IVFPQ(self): |
| 127 | + self.do_test_train("IVF10,PQ4np") |
| 128 | + |
| 129 | + def test_train_SQ(self): |
| 130 | + self.do_test_train("SQ8") |
| 131 | + |
| 132 | + def do_test_add(self, factory_string): |
| 133 | + """ stored NaNs should not be returned at search time """ |
| 134 | + ds = SyntheticDataset(32, 200, 20, 10) |
| 135 | + index = faiss.index_factory(ds.d, factory_string) |
| 136 | + if not index.is_trained: |
| 137 | + index.train(ds.get_train()) |
| 138 | + xb = ds.get_database() |
| 139 | + xb[12, 3] = np.nan |
| 140 | + index.add(xb) |
| 141 | + D, I = index.search(ds.get_queries(), 20) |
| 142 | + self.assertTrue(np.where(I == 12)[0].size == 0) |
| 143 | + |
| 144 | + def test_add_Flat(self): |
| 145 | + self.do_test_add("Flat") |
| 146 | + |
| 147 | + def test_add_HNSW(self): |
| 148 | + self.do_test_add("HNSW32,Flat") |
| 149 | + |
| 150 | + def xx_test_add_SQ8(self): |
| 151 | + # this is expected to fail because: |
| 152 | + # in ASAN mode, the float NaN -> int conversion crashes |
| 153 | + # in opt mode it works but there is no way to encode the NaN, |
| 154 | + # so the value cannot be ignored. |
| 155 | + self.do_test_add("SQ8") |
| 156 | + |
| 157 | + def test_add_IVFFlat(self): |
| 158 | + self.do_test_add("IVF10,Flat") |
| 159 | + |
| 160 | + def do_test_search(self, factory_string): |
| 161 | + """ NaN query vectors should return -1 """ |
| 162 | + ds = SyntheticDataset(32, 200, 20, 10) |
| 163 | + index = faiss.index_factory(ds.d, factory_string) |
| 164 | + if not index.is_trained: |
| 165 | + index.train(ds.get_train()) |
| 166 | + index.add(ds.get_database()) |
| 167 | + xq = ds.get_queries() |
| 168 | + xq[7, 3] = np.nan |
| 169 | + D, I = index.search(ds.get_queries(), 20) |
| 170 | + self.assertTrue(np.all(I[7] == -1)) |
| 171 | + |
| 172 | + def test_search_Flat(self): |
| 173 | + self.do_test_search("Flat") |
| 174 | + |
| 175 | + def test_search_HNSW(self): |
| 176 | + self.do_test_search("HNSW32,Flat") |
| 177 | + |
| 178 | + def test_search_IVFFlat(self): |
| 179 | + self.do_test_search("IVF10,Flat") |
| 180 | + |
| 181 | + def test_search_SQ(self): |
| 182 | + self.do_test_search("SQ8") |
0 commit comments