Skip to content

Commit cbff63e

Browse files
algoriddlefacebook-github-bot
authored andcommitted
addn_query_subset_with_ids float index bug (facebookresearch#2834)
Summary: Pull Request resolved: facebookresearch#2834 Index stored in float results in buffer overflow and corrupts search results. Reviewed By: mdouze Differential Revision: D45388883 fbshipit-source-id: bee696495b323a13350550d1928d25202539b1f0
1 parent 155065c commit cbff63e

File tree

3 files changed

+55
-1
lines changed

3 files changed

+55
-1
lines changed

faiss/utils/Heap.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ void HeapArray<C>::addn_query_subset_with_ids(
9393
}
9494
#pragma omp parallel for if (nsubset * nj > 100000)
9595
for (int64_t si = 0; si < nsubset; si++) {
96-
T i = subset[si];
96+
TI i = subset[si];
9797
T* __restrict simi = get_val(i);
9898
TI* __restrict idxi = get_ids(i);
9999
const T* ip_line = vin + si * nj;

tests/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ set(FAISS_TEST_SRC
2626
test_approx_topk.cpp
2727
test_RCQ_cropping.cpp
2828
test_distances_simd.cpp
29+
test_heap.cpp
2930
)
3031

3132
add_executable(faiss_test ${FAISS_TEST_SRC})

tests/test_heap.cpp

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/**
2+
* Copyright (c) Facebook, Inc. and its affiliates.
3+
*
4+
* This source code is licensed under the MIT license found in the
5+
* LICENSE file in the root directory of this source tree.
6+
*/
7+
#include <faiss/utils/Heap.h>
8+
#include <gtest/gtest.h>
9+
#include <algorithm>
10+
#include <numeric>
11+
12+
using namespace faiss;
13+
14+
TEST(Heap, addn_with_ids) {
15+
size_t n = 1000;
16+
size_t k = 1;
17+
std::vector<int64_t> heap_labels(n, -1);
18+
std::vector<float> heap_distances(n, 0);
19+
float_minheap_array_t heaps = {
20+
n, k, heap_labels.data(), heap_distances.data()};
21+
heaps.heapify();
22+
std::vector<int64_t> labels(n, 1);
23+
std::vector<float> distances(n, 0.0f);
24+
std::vector<int64_t> subset(n);
25+
std::iota(subset.begin(), subset.end(), 0);
26+
heaps.addn_with_ids(1, distances.data(), labels.data(), 1);
27+
heaps.reorder();
28+
EXPECT_TRUE(
29+
std::all_of(heap_labels.begin(), heap_labels.end(), [](int64_t i) {
30+
return i == 1;
31+
}));
32+
}
33+
34+
TEST(Heap, addn_query_subset_with_ids) {
35+
size_t n = 20000000; // more than 2^24
36+
size_t k = 1;
37+
std::vector<int64_t> heap_labels(n, -1);
38+
std::vector<float> heap_distances(n, 0);
39+
float_minheap_array_t heaps = {
40+
n, k, heap_labels.data(), heap_distances.data()};
41+
heaps.heapify();
42+
std::vector<int64_t> labels(n, 1);
43+
std::vector<float> distances(n, 0.0f);
44+
std::vector<int64_t> subset(n);
45+
std::iota(subset.begin(), subset.end(), 0);
46+
heaps.addn_query_subset_with_ids(
47+
n, subset.data(), 1, distances.data(), labels.data(), 1);
48+
heaps.reorder();
49+
EXPECT_TRUE(
50+
std::all_of(heap_labels.begin(), heap_labels.end(), [](int64_t i) {
51+
return i == 1;
52+
}));
53+
}

0 commit comments

Comments
 (0)