Skip to content

Commit e49ff16

Browse files
authored
Merge pull request #2 from yuhaijun999/main-dingodb
Optimize IndexIDMap2::construct_rev_map function.
2 parents bf46c0f + f65120c commit e49ff16

File tree

2 files changed

+93
-5
lines changed

2 files changed

+93
-5
lines changed

faiss/IndexIDMap.cpp

+88-5
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
#include <cstdint>
1414
#include <cstdio>
1515
#include <limits>
16+
#include <vector>
17+
#include "faiss/MetricType.h"
1618

1719
#include <faiss/impl/AuxIndexStructures.h>
1820
#include <faiss/impl/FaissAssert.h>
@@ -253,16 +255,97 @@ void IndexIDMap2Template<IndexT>::merge_from(IndexT& otherIndex, idx_t add_id) {
253255

254256
template <typename IndexT>
255257
void IndexIDMap2Template<IndexT>::construct_rev_map() {
256-
rev_map.clear();
257-
for (size_t i = 0; i < this->ntotal; i++) {
258-
rev_map[this->id_map[i]] = i;
258+
if (std::numeric_limits<idx_t>::min() == min_max_id_map_index[0] ||
259+
std::numeric_limits<idx_t>::min() == min_max_id_map_index[1] ||
260+
delete_id_map_value.empty()) {
261+
rev_map.clear();
262+
for (size_t i = 0; i < this->ntotal; i++) {
263+
rev_map[this->id_map[i]] = i;
264+
}
265+
} else {
266+
// accelerate build logic
267+
// 1. Find the boundaries of deleted elements, namely the minimum and
268+
// maximum values
269+
// 2. The data before the minimum value has not changed
270+
// 3. Reconstruct the data between the minimum value and the maximum
271+
// value
272+
// 4. The data after the maximum value can be subtracted from the
273+
// constant value .
274+
// 5. Finally delete the specified value
275+
276+
// section 1
277+
// The data has not changed. Ignore.
278+
// for (size_t i = 0; min_max_id_map_index[0] && i < this->ntotal; i++)
279+
// {}
280+
281+
// section 2 The data has changed. Refactor.
282+
for (int64_t i = min_max_id_map_index[0];
283+
i <= (min_max_id_map_index[1] -
284+
static_cast<idx_t>(delete_id_map_value.size())) &&
285+
i < this->ntotal;
286+
i++) {
287+
rev_map[this->id_map[i]] = i;
288+
}
289+
290+
// section 3 value minus a fixed value.
291+
for (int64_t i = (min_max_id_map_index[1] -
292+
static_cast<idx_t>(delete_id_map_value.size())) +
293+
1;
294+
i < this->ntotal;
295+
i++) {
296+
rev_map[this->id_map[i]] -= delete_id_map_value.size();
297+
}
298+
299+
// delete the specified value
300+
for (idx_t value : delete_id_map_value) {
301+
rev_map.erase(value);
302+
}
303+
304+
FAISS_ASSERT(rev_map.size() == this->ntotal);
305+
306+
delete_id_map_value.clear();
307+
min_max_id_map_index[0] = min_max_id_map_index[1] =
308+
std::numeric_limits<idx_t>::min();
259309
}
260310
}
261311

262312
template <typename IndexT>
263313
size_t IndexIDMap2Template<IndexT>::remove_ids(const IDSelector& sel) {
264-
// This is quite inefficient
265-
size_t nremove = IndexIDMapTemplate<IndexT>::remove_ids(sel);
314+
// remove in sub-index first
315+
IDSelectorTranslated sel2(IndexIDMapTemplate<IndexT>::id_map, &sel);
316+
size_t nremove = IndexIDMapTemplate<IndexT>::index->remove_ids(sel2);
317+
318+
if (0 == nremove) {
319+
return nremove;
320+
}
321+
322+
delete_id_map_value.clear();
323+
delete_id_map_value.reserve(nremove);
324+
min_max_id_map_index[0] = min_max_id_map_index[1] =
325+
std::numeric_limits<idx_t>::min();
326+
327+
int64_t j = 0;
328+
for (idx_t i = 0; i < this->ntotal; i++) {
329+
if (sel.is_member(IndexIDMapTemplate<IndexT>::id_map[i])) {
330+
// remove
331+
// record for accelerate
332+
delete_id_map_value.push_back(
333+
IndexIDMapTemplate<IndexT>::id_map[i]);
334+
if (std::numeric_limits<idx_t>::min() == min_max_id_map_index[0]) {
335+
min_max_id_map_index[0] = i;
336+
}
337+
min_max_id_map_index[1] = i;
338+
} else {
339+
IndexIDMapTemplate<IndexT>::id_map[j] =
340+
IndexIDMapTemplate<IndexT>::id_map[i];
341+
j++;
342+
}
343+
}
344+
FAISS_ASSERT(j == IndexIDMapTemplate<IndexT>::index->ntotal);
345+
this->ntotal = j;
346+
IndexIDMapTemplate<IndexT>::id_map.resize(this->ntotal);
347+
FAISS_ASSERT(nremove == delete_id_map_value.size());
348+
266349
construct_rev_map();
267350
return nremove;
268351
}

faiss/IndexIDMap.h

+5
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <faiss/IndexBinary.h>
1212
#include <faiss/impl/IDSelector.h>
1313

14+
#include <limits>
1415
#include <unordered_map>
1516
#include <vector>
1617

@@ -78,6 +79,10 @@ struct IndexIDMap2Template : IndexIDMapTemplate<IndexT> {
7879
using distance_t = typename IndexT::distance_t;
7980

8081
std::unordered_map<idx_t, idx_t> rev_map;
82+
std::vector<idx_t> delete_id_map_value;
83+
idx_t min_max_id_map_index[2]{
84+
std::numeric_limits<idx_t>::min(),
85+
std::numeric_limits<idx_t>::min()};
8186

8287
explicit IndexIDMap2Template(IndexT* index);
8388

0 commit comments

Comments
 (0)