Skip to content

Commit edcf743

Browse files
cjnoletfacebook-github-bot
authored andcommitted
Integrate IVF-Flat from RAFT (facebookresearch#2521)
Summary: This is a design proposal that demonstrates an approach to enabling optional support for [RAFT](https://github.com/rapidsai/raft) versions of IVF PQ and IVF Flat (and brute force w/ fused k-selection when k <= 64). There are still a few open issues and design discussions needed for the new RAFT index types to support the full range of features of that FAISS' current gpu index types. Checklist for the integration todos: - [x] Rebase on current `main` branch - [X] The raft handle has been plugged directly into the StandardGpuResources - [X] `FlatIndex` passing Googletests - [x] Use `CodePacker` to support `copyFrom()` and `copyTo()` - [X] `IVF-flat passing Googletests - [ ] Raise appropriate exceptions for operations which are not yet supported by RAFT Additional features we've discussed: - [x] Separate IVF lists into individual memory chunks - [ ] Saving/loading To build FAISS w/ optional RAFT support: ``` mkdir build cd build cmake ../ -DFAISS_ENABLE_RAFT=ON -DFAISS_ENABLE_GPU=ON make -j ``` For development/testing, we've also supplied a bash script to make things easier: `build.sh` Below is a benchmark comparing the training of IVF Flat indices for RAFT and FAISS: ![image](https://user-images.githubusercontent.com/1242464/194944737-8b808f11-e28e-4556-82d1-1ea4b0707283.png) The benchmark was produced using Googlebench in [this](https://github.com/tfeher/raft/tree/raft_faiss_bench) RAFT fork. We're going to provide benchmarks for the queries as well. There are still a couple bottlenecks to be removed in the IVF-Flat training implementation and we'll update the current benchmark when ready. Pull Request resolved: facebookresearch#2521 Test Plan: `buck test mode/debuck test mode/dev-nosan //faiss/gpu/test:test_gpu_index_ivfflat` Reviewed By: algoriddle Differential Revision: D49118319 Pulled By: mdouze fbshipit-source-id: 5916108bc27154acf7c92021ba579a6ca85d730b
1 parent 458633c commit edcf743

14 files changed

+1231
-91
lines changed

build.sh

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#!/bin/bash
2+
3+
# NOTE: This file is temporary for the proof-of-concept branch and will be removed before this PR is merged
4+
5+
BUILD_TYPE=Release
6+
BUILD_DIR=build/
7+
8+
RAFT_REPO_REL=""
9+
EXTRA_CMAKE_ARGS=""
10+
set -e
11+
12+
if [[ ${RAFT_REPO_REL} != "" ]]; then
13+
RAFT_REPO_PATH="`readlink -f \"${RAFT_REPO_REL}\"`"
14+
EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCPM_raft_SOURCE=${RAFT_REPO_PATH}"
15+
fi
16+
17+
if [ "$1" == "clean" ]; then
18+
rm -rf build
19+
rm -rf .cache
20+
exit 0
21+
fi
22+
23+
if [ "$1" == "test" ]; then
24+
make -C build -j test
25+
exit 0
26+
fi
27+
28+
if [ "$1" == "test-raft" ]; then
29+
./build/faiss/gpu/test/TestRaftIndexIVFFlat
30+
exit 0
31+
fi
32+
33+
mkdir -p $BUILD_DIR
34+
cd $BUILD_DIR
35+
36+
cmake \
37+
-DFAISS_ENABLE_GPU=ON \
38+
-DFAISS_ENABLE_RAFT=ON \
39+
-DFAISS_ENABLE_PYTHON=OFF \
40+
-DBUILD_TESTING=ON \
41+
-DBUILD_SHARED_LIBS=OFF \
42+
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
43+
-DFAISS_OPT_LEVEL=avx2 \
44+
-DRAFT_NVTX=OFF \
45+
-DCMAKE_CUDA_ARCHITECTURES="NATIVE" \
46+
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
47+
-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
48+
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
49+
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
50+
${EXTRA_CMAKE_ARGS} \
51+
../
52+
53+
54+
# make -C build -j12 faiss
55+
cmake --build . -j12
56+
# make -C build -j12 swigfaiss
57+
# (cd build/faiss/python && python setup.py install)
58+

cmake/thirdparty/fetch_rapids.cmake

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# or implied. See the License for the specific language governing permissions and limitations under
1616
# the License.
1717
# =============================================================================
18-
set(RAPIDS_VERSION "23.06")
18+
set(RAPIDS_VERSION "23.08")
1919

2020
if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/FAISS_RAPIDS.cmake)
2121
file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake

faiss/gpu/CMakeLists.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -238,9 +238,11 @@ generate_ivf_interleaved_code()
238238

239239
if(FAISS_ENABLE_RAFT)
240240
list(APPEND FAISS_GPU_HEADERS
241+
impl/RaftIVFFlat.cuh
241242
impl/RaftFlatIndex.cuh)
242243
list(APPEND FAISS_GPU_SRC
243-
impl/RaftFlatIndex.cu)
244+
impl/RaftFlatIndex.cu
245+
impl/RaftIVFFlat.cu)
244246

245247
target_compile_definitions(faiss PUBLIC USE_NVIDIA_RAFT=1)
246248
target_compile_definitions(faiss_avx2 PUBLIC USE_NVIDIA_RAFT=1)

faiss/gpu/GpuIndexIVF.cu

+43-6
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@
1616
#include <faiss/gpu/impl/IVFBase.cuh>
1717
#include <faiss/gpu/utils/CopyUtils.cuh>
1818

19+
#if defined USE_NVIDIA_RAFT
20+
#include <raft/core/handle.hpp>
21+
#include <raft/neighbors/ivf_flat.cuh>
22+
#endif
23+
1924
namespace faiss {
2025
namespace gpu {
2126

@@ -444,14 +449,46 @@ void GpuIndexIVF::trainQuantizer_(idx_t n, const float* x) {
444449
printf("Training IVF quantizer on %ld vectors in %dD\n", n, d);
445450
}
446451

447-
// leverage the CPU-side k-means code, which works for the GPU
448-
// flat index as well
449452
quantizer->reset();
450-
Clustering clus(this->d, nlist, this->cp);
451-
clus.verbose = verbose;
452-
clus.train(n, x, *quantizer);
453-
quantizer->is_trained = true;
454453

454+
#if defined USE_NVIDIA_RAFT
455+
456+
if (config_.use_raft) {
457+
const raft::device_resources& raft_handle =
458+
resources_->getRaftHandleCurrentDevice();
459+
460+
raft::neighbors::ivf_flat::index_params raft_idx_params;
461+
raft_idx_params.n_lists = nlist;
462+
raft_idx_params.metric = metric_type == faiss::METRIC_L2
463+
? raft::distance::DistanceType::L2Expanded
464+
: raft::distance::DistanceType::InnerProduct;
465+
raft_idx_params.add_data_on_build = false;
466+
raft_idx_params.kmeans_trainset_fraction = 1.0;
467+
raft_idx_params.kmeans_n_iters = cp.niter;
468+
raft_idx_params.adaptive_centers = !cp.frozen_centroids;
469+
470+
auto raft_index = raft::neighbors::ivf_flat::build(
471+
raft_handle, raft_idx_params, x, n, (idx_t)d);
472+
473+
raft_handle.sync_stream();
474+
475+
quantizer->train(nlist, raft_index.centers().data_handle());
476+
quantizer->add(nlist, raft_index.centers().data_handle());
477+
} else
478+
#else
479+
if (config_.use_raft) {
480+
FAISS_THROW_MSG(
481+
"RAFT has not been compiled into the current version so it cannot be used.");
482+
} else
483+
#endif
484+
{
485+
// leverage the CPU-side k-means code, which works for the GPU
486+
// flat index as well
487+
Clustering clus(this->d, nlist, this->cp);
488+
clus.verbose = verbose;
489+
clus.train(n, x, *quantizer);
490+
}
491+
quantizer->is_trained = true;
455492
FAISS_ASSERT(quantizer->ntotal == nlist);
456493
}
457494

faiss/gpu/GpuIndexIVF.h

+7-6
Original file line numberDiff line numberDiff line change
@@ -73,23 +73,24 @@ class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
7373
virtual void updateQuantizer() = 0;
7474

7575
/// Returns the number of inverted lists we're managing
76-
idx_t getNumLists() const;
76+
virtual idx_t getNumLists() const;
7777

7878
/// Returns the number of vectors present in a particular inverted list
79-
idx_t getListLength(idx_t listId) const;
79+
virtual idx_t getListLength(idx_t listId) const;
8080

8181
/// Return the encoded vector data contained in a particular inverted list,
8282
/// for debugging purposes.
8383
/// If gpuFormat is true, the data is returned as it is encoded in the
8484
/// GPU-side representation.
8585
/// Otherwise, it is converted to the CPU format.
8686
/// compliant format, while the native GPU format may differ.
87-
std::vector<uint8_t> getListVectorData(idx_t listId, bool gpuFormat = false)
88-
const;
87+
virtual std::vector<uint8_t> getListVectorData(
88+
idx_t listId,
89+
bool gpuFormat = false) const;
8990

9091
/// Return the vector indices contained in a particular inverted list, for
9192
/// debugging purposes.
92-
std::vector<idx_t> getListIndices(idx_t listId) const;
93+
virtual std::vector<idx_t> getListIndices(idx_t listId) const;
9394

9495
void search_preassigned(
9596
idx_t n,
@@ -121,7 +122,7 @@ class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
121122
int getCurrentNProbe_(const SearchParameters* params) const;
122123
void verifyIVFSettings_() const;
123124
bool addImplRequiresIDs_() const override;
124-
void trainQuantizer_(idx_t n, const float* x);
125+
virtual void trainQuantizer_(idx_t n, const float* x);
125126

126127
/// Called from GpuIndex for add/add_with_ids
127128
void addImpl_(idx_t n, const float* x, const idx_t* ids) override;

faiss/gpu/GpuIndexIVFFlat.cu

+83-20
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515
#include <faiss/gpu/utils/CopyUtils.cuh>
1616
#include <faiss/gpu/utils/Float16.cuh>
1717

18+
#if defined USE_NVIDIA_RAFT
19+
#include <faiss/gpu/impl/RaftIVFFlat.cuh>
20+
#endif
21+
1822
#include <limits>
1923

2024
namespace faiss {
@@ -70,8 +74,7 @@ GpuIndexIVFFlat::GpuIndexIVFFlat(
7074
// no other quantizer that we need to train, so this is sufficient
7175
if (this->is_trained) {
7276
FAISS_ASSERT(this->quantizer);
73-
74-
index_.reset(new IVFFlat(
77+
set_index_(
7578
resources_.get(),
7679
this->d,
7780
this->nlist,
@@ -81,14 +84,62 @@ GpuIndexIVFFlat::GpuIndexIVFFlat(
8184
nullptr, // no scalar quantizer
8285
ivfFlatConfig_.interleavedLayout,
8386
ivfFlatConfig_.indicesOptions,
84-
config_.memorySpace));
87+
config_.memorySpace);
8588
baseIndex_ = std::static_pointer_cast<IVFBase, IVFFlat>(index_);
8689
updateQuantizer();
8790
}
8891
}
8992

9093
GpuIndexIVFFlat::~GpuIndexIVFFlat() {}
9194

95+
void GpuIndexIVFFlat::set_index_(
96+
GpuResources* resources,
97+
int dim,
98+
int nlist,
99+
faiss::MetricType metric,
100+
float metricArg,
101+
bool useResidual,
102+
/// Optional ScalarQuantizer
103+
faiss::ScalarQuantizer* scalarQ,
104+
bool interleavedLayout,
105+
IndicesOptions indicesOptions,
106+
MemorySpace space) {
107+
#if defined USE_NVIDIA_RAFT
108+
109+
if (config_.use_raft) {
110+
index_.reset(new RaftIVFFlat(
111+
resources,
112+
dim,
113+
nlist,
114+
metric,
115+
metricArg,
116+
useResidual,
117+
scalarQ,
118+
interleavedLayout,
119+
indicesOptions,
120+
space));
121+
} else
122+
#else
123+
if (config_.use_raft) {
124+
FAISS_THROW_MSG(
125+
"RAFT has not been compiled into the current version so it cannot be used.");
126+
} else
127+
#endif
128+
{
129+
index_.reset(new IVFFlat(
130+
resources,
131+
dim,
132+
nlist,
133+
metric,
134+
metricArg,
135+
useResidual,
136+
scalarQ,
137+
interleavedLayout,
138+
indicesOptions,
139+
space));
140+
}
141+
}
142+
92143
void GpuIndexIVFFlat::reserveMemory(size_t numVecs) {
93144
DeviceScope scope(config_.device);
94145

@@ -110,25 +161,25 @@ void GpuIndexIVFFlat::copyFrom(const faiss::IndexIVFFlat* index) {
110161

111162
// The other index might not be trained
112163
if (!index->is_trained) {
113-
FAISS_ASSERT(!this->is_trained);
164+
FAISS_ASSERT(!is_trained);
114165
return;
115166
}
116167

117168
// Otherwise, we can populate ourselves from the other index
118-
FAISS_ASSERT(this->is_trained);
169+
FAISS_ASSERT(is_trained);
119170

120171
// Copy our lists as well
121-
index_.reset(new IVFFlat(
172+
set_index_(
122173
resources_.get(),
123-
this->d,
124-
this->nlist,
174+
d,
175+
nlist,
125176
index->metric_type,
126177
index->metric_arg,
127178
false, // no residual
128179
nullptr, // no scalar quantizer
129180
ivfFlatConfig_.interleavedLayout,
130181
ivfFlatConfig_.indicesOptions,
131-
config_.memorySpace));
182+
config_.memorySpace);
132183
baseIndex_ = std::static_pointer_cast<IVFBase, IVFFlat>(index_);
133184
updateQuantizer();
134185

@@ -201,18 +252,30 @@ void GpuIndexIVFFlat::train(idx_t n, const float* x) {
201252

202253
FAISS_ASSERT(!index_);
203254

204-
// FIXME: GPUize more of this
205-
// First, make sure that the data is resident on the CPU, if it is not on
206-
// the CPU, as we depend upon parts of the CPU code
207-
auto hostData = toHost<float, 2>(
208-
(float*)x,
209-
resources_->getDefaultStream(config_.device),
210-
{n, this->d});
211-
212-
trainQuantizer_(n, hostData.data());
255+
#if defined USE_NVIDIA_RAFT
256+
if (config_.use_raft) {
257+
// No need to copy the data to host
258+
trainQuantizer_(n, x);
259+
} else
260+
#else
261+
if (config_.use_raft) {
262+
FAISS_THROW_MSG(
263+
"RAFT has not been compiled into the current version so it cannot be used.");
264+
} else
265+
#endif
266+
{
267+
// FIXME: GPUize more of this
268+
// First, make sure that the data is resident on the CPU, if it is not
269+
// on the CPU, as we depend upon parts of the CPU code
270+
auto hostData = toHost<float, 2>(
271+
(float*)x,
272+
resources_->getDefaultStream(config_.device),
273+
{n, this->d});
274+
trainQuantizer_(n, hostData.data());
275+
}
213276

214277
// The quantizer is now trained; construct the IVF index
215-
index_.reset(new IVFFlat(
278+
set_index_(
216279
resources_.get(),
217280
this->d,
218281
this->nlist,
@@ -222,7 +285,7 @@ void GpuIndexIVFFlat::train(idx_t n, const float* x) {
222285
nullptr, // no scalar quantizer
223286
ivfFlatConfig_.interleavedLayout,
224287
ivfFlatConfig_.indicesOptions,
225-
config_.memorySpace));
288+
config_.memorySpace);
226289
baseIndex_ = std::static_pointer_cast<IVFBase, IVFFlat>(index_);
227290
updateQuantizer();
228291

faiss/gpu/GpuIndexIVFFlat.h

+15
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
#pragma once
99

1010
#include <faiss/gpu/GpuIndexIVF.h>
11+
#include <faiss/impl/ScalarQuantizer.h>
12+
1113
#include <memory>
1214

1315
namespace faiss {
@@ -86,6 +88,19 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
8688
void train(idx_t n, const float* x) override;
8789

8890
protected:
91+
void set_index_(
92+
GpuResources* resources,
93+
int dim,
94+
int nlist,
95+
faiss::MetricType metric,
96+
float metricArg,
97+
bool useResidual,
98+
/// Optional ScalarQuantizer
99+
faiss::ScalarQuantizer* scalarQ,
100+
bool interleavedLayout,
101+
IndicesOptions indicesOptions,
102+
MemorySpace space);
103+
89104
/// Our configuration options
90105
const GpuIndexIVFFlatConfig ivfFlatConfig_;
91106

faiss/gpu/StandardGpuResources.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,11 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
362362

363363
defaultStreams_[device] = defaultStream;
364364

365-
cudaStream_t asyncCopyStream = nullptr;
365+
#if defined USE_NVIDIA_RAFT
366+
raftHandles_.emplace(std::make_pair(device, defaultStream));
367+
#endif
368+
369+
cudaStream_t asyncCopyStream = 0;
366370
CUDA_VERIFY(
367371
cudaStreamCreateWithFlags(&asyncCopyStream, cudaStreamNonBlocking));
368372

0 commit comments

Comments
 (0)