Skip to content

Commit e8b2c7c

Browse files
mdouzefacebook-github-bot
authored andcommitted
Support RAFT from python (#2864)
Summary: Pull Request resolved: #2864 Adds use_raft to the cloner options. Adds tests for the python interface. Also continue cleanup of data structures to set default arguments. Add flags GPU and NVIDIA_RAFT to get_compile_options() Reviewed By: algoriddle Differential Revision: D45943372 fbshipit-source-id: 276bedf7461e2f61a91ec72aa8695d97156e7fbe
1 parent 48d48a3 commit e8b2c7c

16 files changed

+149
-89
lines changed

faiss/gpu/GpuCloner.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
121121
GpuIndexFlatConfig config;
122122
config.device = device;
123123
config.useFloat16 = useFloat16;
124+
config.use_raft = use_raft;
124125
return new GpuIndexFlat(provider, ifl, config);
125126
} else if (
126127
dynamic_cast<const IndexScalarQuantizer*>(index) &&
@@ -129,6 +130,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
129130
GpuIndexFlatConfig config;
130131
config.device = device;
131132
config.useFloat16 = true;
133+
FAISS_THROW_IF_NOT_MSG(
134+
!use_raft, "this type of index is not implemented for RAFT");
132135
GpuIndexFlat* gif = new GpuIndexFlat(
133136
provider, index->d, index->metric_type, config);
134137
// transfer data by blocks
@@ -146,6 +149,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
146149
config.device = device;
147150
config.indicesOptions = indicesOptions;
148151
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
152+
FAISS_THROW_IF_NOT_MSG(
153+
!use_raft, "this type of index is not implemented for RAFT");
149154

150155
GpuIndexIVFFlat* res = new GpuIndexIVFFlat(
151156
provider, ifl->d, ifl->nlist, ifl->metric_type, config);
@@ -162,6 +167,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
162167
config.device = device;
163168
config.indicesOptions = indicesOptions;
164169
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
170+
FAISS_THROW_IF_NOT_MSG(
171+
!use_raft, "this type of index is not implemented for RAFT");
165172

166173
GpuIndexIVFScalarQuantizer* res = new GpuIndexIVFScalarQuantizer(
167174
provider,
@@ -194,6 +201,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
194201
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
195202
config.useFloat16LookupTables = useFloat16;
196203
config.usePrecomputedTables = usePrecomputed;
204+
FAISS_THROW_IF_NOT_MSG(
205+
!use_raft, "this type of index is not implemented for RAFT");
197206

198207
GpuIndexIVFPQ* res = new GpuIndexIVFPQ(provider, ipq, config);
199208

faiss/gpu/GpuClonerOptions.h

+3
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ struct GpuClonerOptions {
3636

3737
/// Set verbose options on the index
3838
bool verbose = false;
39+
40+
/// use the RAFT implementation
41+
bool use_raft = false;
3942
};
4043

4144
struct GpuMultipleClonerOptions : public GpuClonerOptions {

faiss/gpu/GpuDistance.h

+18-38
Original file line numberDiff line numberDiff line change
@@ -28,44 +28,24 @@ enum class IndicesDataType {
2828

2929
/// Arguments to brute-force GPU k-nearest neighbor searching
3030
struct GpuDistanceParams {
31-
GpuDistanceParams()
32-
: metric(faiss::MetricType::METRIC_L2),
33-
metricArg(0),
34-
k(0),
35-
dims(0),
36-
vectors(nullptr),
37-
vectorType(DistanceDataType::F32),
38-
vectorsRowMajor(true),
39-
numVectors(0),
40-
vectorNorms(nullptr),
41-
queries(nullptr),
42-
queryType(DistanceDataType::F32),
43-
queriesRowMajor(true),
44-
numQueries(0),
45-
outDistances(nullptr),
46-
ignoreOutDistances(false),
47-
outIndicesType(IndicesDataType::I64),
48-
outIndices(nullptr),
49-
device(-1) {}
50-
5131
//
5232
// Search parameters
5333
//
5434

5535
/// Search parameter: distance metric
56-
faiss::MetricType metric;
36+
faiss::MetricType metric = METRIC_L2;
5737

5838
/// Search parameter: distance metric argument (if applicable)
5939
/// For metric == METRIC_Lp, this is the p-value
60-
float metricArg;
40+
float metricArg = 0;
6141

6242
/// Search parameter: return k nearest neighbors
6343
/// If the value provided is -1, then we report all pairwise distances
6444
/// without top-k filtering
65-
int k;
45+
int k = 0;
6646

6747
/// Vector dimensionality
68-
int dims;
48+
int dims = 0;
6949

7050
//
7151
// Vectors being queried
@@ -74,14 +54,14 @@ struct GpuDistanceParams {
7454
/// If vectorsRowMajor is true, this is
7555
/// numVectors x dims, with dims innermost; otherwise,
7656
/// dims x numVectors, with numVectors innermost
77-
const void* vectors;
78-
DistanceDataType vectorType;
79-
bool vectorsRowMajor;
80-
idx_t numVectors;
57+
const void* vectors = nullptr;
58+
DistanceDataType vectorType = DistanceDataType::F32;
59+
bool vectorsRowMajor = true;
60+
idx_t numVectors = 0;
8161

8262
/// Precomputed L2 norms for each vector in `vectors`, which can be
8363
/// optionally provided in advance to speed computation for METRIC_L2
84-
const float* vectorNorms;
64+
const float* vectorNorms = nullptr;
8565

8666
//
8767
// The query vectors (i.e., find k-nearest neighbors in `vectors` for each
@@ -91,10 +71,10 @@ struct GpuDistanceParams {
9171
/// If queriesRowMajor is true, this is
9272
/// numQueries x dims, with dims innermost; otherwise,
9373
/// dims x numQueries, with numQueries innermost
94-
const void* queries;
95-
DistanceDataType queryType;
96-
bool queriesRowMajor;
97-
idx_t numQueries;
74+
const void* queries = nullptr;
75+
DistanceDataType queryType = DistanceDataType::F32;
76+
bool queriesRowMajor = true;
77+
idx_t numQueries = 0;
9878

9979
//
10080
// Output results
@@ -103,16 +83,16 @@ struct GpuDistanceParams {
10383
/// A region of memory size numQueries x k, with k
10484
/// innermost (row major) if k > 0, or if k == -1, a region of memory of
10585
/// size numQueries x numVectors
106-
float* outDistances;
86+
float* outDistances = nullptr;
10787

10888
/// Do we only care about the indices reported, rather than the output
10989
/// distances? Not used if k == -1 (all pairwise distances)
110-
bool ignoreOutDistances;
90+
bool ignoreOutDistances = false;
11191

11292
/// A region of memory size numQueries x k, with k
11393
/// innermost (row major). Not used if k == -1 (all pairwise distances)
114-
IndicesDataType outIndicesType;
115-
void* outIndices;
94+
IndicesDataType outIndicesType = IndicesDataType::I64;
95+
void* outIndices = nullptr;
11696

11797
//
11898
// Execution information
@@ -123,7 +103,7 @@ struct GpuDistanceParams {
123103
/// (via cudaGetDevice/cudaSetDevice) is used
124104
/// Otherwise, an integer 0 <= device < numDevices indicates the device for
125105
/// execution
126-
int device;
106+
int device = -1;
127107

128108
/// Should the index dispatch down to RAFT?
129109
bool use_raft = false;

faiss/gpu/GpuIndex.cu

+17
Original file line numberDiff line numberDiff line change
@@ -514,4 +514,21 @@ bool isGpuIndexImplemented(faiss::Index* index) {
514514
}
515515

516516
} // namespace gpu
517+
518+
// This is the one defined in utils.cpp
519+
// Crossing fingers that the InitGpuOptions_instance will
520+
// be instanciated after this global variable
521+
extern std::string gpu_options;
522+
523+
struct InitGpuOptions {
524+
InitGpuOptions() {
525+
gpu_options = "GPU ";
526+
#ifdef USE_NVIDIA_RAFT
527+
gpu_options += "NVIDIA_RAFT ";
528+
#endif
529+
}
530+
};
531+
532+
InitGpuOptions InitGpuOptions_instance;
533+
517534
} // namespace faiss

faiss/gpu/GpuIndex.h

+2-4
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,13 @@ namespace faiss {
2929
namespace gpu {
3030

3131
struct GpuIndexConfig {
32-
inline GpuIndexConfig() : device(0), memorySpace(MemorySpace::Device) {}
33-
3432
/// GPU device on which the index is resident
35-
int device;
33+
int device = 0;
3634

3735
/// What memory space to use for primary storage.
3836
/// On Pascal and above (CC 6+) architectures, allows GPUs to use
3937
/// more memory than is available on the GPU.
40-
MemorySpace memorySpace;
38+
MemorySpace memorySpace = MemorySpace::Device;
4139

4240
/// Should the index dispatch down to RAFT?
4341
bool use_raft = false;

faiss/gpu/GpuIndexFlat.h

+2-4
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,13 @@ namespace gpu {
2424
class FlatIndex;
2525

2626
struct GpuIndexFlatConfig : public GpuIndexConfig {
27-
inline GpuIndexFlatConfig() : useFloat16(false) {}
28-
2927
/// Whether or not data is stored as float16
30-
bool useFloat16;
28+
bool useFloat16 = false;
3129

3230
/// Deprecated: no longer used
3331
/// Previously used to indicate whether internal storage of vectors is
3432
/// transposed
35-
bool storeTransposed;
33+
bool storeTransposed = false;
3634
};
3735

3836
/// Wrapper around the GPU implementation that looks like

faiss/gpu/GpuIndexIVF.h

+1-3
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,8 @@ class GpuIndexFlat;
2121
class IVFBase;
2222

2323
struct GpuIndexIVFConfig : public GpuIndexConfig {
24-
inline GpuIndexIVFConfig() : indicesOptions(INDICES_64_BIT) {}
25-
2624
/// Index storage options for the GPU
27-
IndicesOptions indicesOptions;
25+
IndicesOptions indicesOptions = INDICES_64_BIT;
2826

2927
/// Configuration for the coarse quantizer object
3028
GpuIndexFlatConfig flatConfig;

faiss/gpu/GpuIndexIVFFlat.h

+1-3
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,9 @@ class IVFFlat;
2121
class GpuIndexFlat;
2222

2323
struct GpuIndexIVFFlatConfig : public GpuIndexIVFConfig {
24-
inline GpuIndexIVFFlatConfig() : interleavedLayout(true) {}
25-
2624
/// Use the alternative memory layout for the IVF lists
2725
/// (currently the default)
28-
bool interleavedLayout;
26+
bool interleavedLayout = true;
2927
};
3028

3129
/// Wrapper around the GPU implementation that looks like

faiss/gpu/GpuIndexIVFPQ.h

+4-10
Original file line numberDiff line numberDiff line change
@@ -23,24 +23,18 @@ class GpuIndexFlat;
2323
class IVFPQ;
2424

2525
struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
26-
inline GpuIndexIVFPQConfig()
27-
: useFloat16LookupTables(false),
28-
usePrecomputedTables(false),
29-
interleavedLayout(false),
30-
useMMCodeDistance(false) {}
31-
3226
/// Whether or not float16 residual distance tables are used in the
3327
/// list scanning kernels. When subQuantizers * 2^bitsPerCode >
3428
/// 16384, this is required.
35-
bool useFloat16LookupTables;
29+
bool useFloat16LookupTables = false;
3630

3731
/// Whether or not we enable the precomputed table option for
3832
/// search, which can substantially increase the memory requirement.
39-
bool usePrecomputedTables;
33+
bool usePrecomputedTables = false;
4034

4135
/// Use the alternative memory layout for the IVF lists
4236
/// WARNING: this is a feature under development, do not use!
43-
bool interleavedLayout;
37+
bool interleavedLayout = false;
4438

4539
/// Use GEMM-backed computation of PQ code distances for the no precomputed
4640
/// table version of IVFPQ.
@@ -50,7 +44,7 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
5044
/// Note that MM code distance is enabled automatically if one uses a number
5145
/// of dimensions per sub-quantizer that is not natively specialized (an odd
5246
/// number like 7 or so).
53-
bool useMMCodeDistance;
47+
bool useMMCodeDistance = false;
5448
};
5549

5650
/// IVFPQ index for the GPU

faiss/gpu/GpuIndexIVFScalarQuantizer.h

+1-3
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,9 @@ class IVFFlat;
1818
class GpuIndexFlat;
1919

2020
struct GpuIndexIVFScalarQuantizerConfig : public GpuIndexIVFConfig {
21-
inline GpuIndexIVFScalarQuantizerConfig() : interleavedLayout(true) {}
22-
2321
/// Use the alternative memory layout for the IVF lists
2422
/// (currently the default)
25-
bool interleavedLayout;
23+
bool interleavedLayout = true;
2624
};
2725

2826
/// Wrapper around the GPU implementation that looks like

faiss/gpu/GpuResources.h

+7-11
Original file line numberDiff line numberDiff line change
@@ -102,11 +102,7 @@ std::string memorySpaceToString(MemorySpace s);
102102

103103
/// Information on what/where an allocation is
104104
struct AllocInfo {
105-
inline AllocInfo()
106-
: type(AllocType::Other),
107-
device(0),
108-
space(MemorySpace::Device),
109-
stream(nullptr) {}
105+
inline AllocInfo() {}
110106

111107
inline AllocInfo(AllocType at, int dev, MemorySpace sp, cudaStream_t st)
112108
: type(at), device(dev), space(sp), stream(st) {}
@@ -115,13 +111,13 @@ struct AllocInfo {
115111
std::string toString() const;
116112

117113
/// The internal category of the allocation
118-
AllocType type;
114+
AllocType type = AllocType::Other;
119115

120116
/// The device on which the allocation is happening
121-
int device;
117+
int device = 0;
122118

123119
/// The memory space of the allocation
124-
MemorySpace space;
120+
MemorySpace space = MemorySpace::Device;
125121

126122
/// The stream on which new work on the memory will be ordered (e.g., if a
127123
/// piece of memory cached and to be returned for this call was last used on
@@ -131,7 +127,7 @@ struct AllocInfo {
131127
///
132128
/// The memory manager guarantees that the returned memory is free to use
133129
/// without data races on this stream specified.
134-
cudaStream_t stream;
130+
cudaStream_t stream = nullptr;
135131
};
136132

137133
/// Create an AllocInfo for the current device with MemorySpace::Device
@@ -145,7 +141,7 @@ AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st);
145141

146142
/// Information on what/where an allocation is, along with how big it should be
147143
struct AllocRequest : public AllocInfo {
148-
inline AllocRequest() : AllocInfo(), size(0) {}
144+
inline AllocRequest() {}
149145

150146
inline AllocRequest(const AllocInfo& info, size_t sz)
151147
: AllocInfo(info), size(sz) {}
@@ -162,7 +158,7 @@ struct AllocRequest : public AllocInfo {
162158
std::string toString() const;
163159

164160
/// The size in bytes of the allocation
165-
size_t size;
161+
size_t size = 0;
166162
};
167163

168164
/// A RAII object that manages a temporary memory request

faiss/gpu/test/test_gpu_basics.py

+6
Original file line numberDiff line numberDiff line change
@@ -426,3 +426,9 @@ def test_with_gpu(self):
426426
self.assertTrue(0.9 * err_rq0 < err_rq1 < 1.1 * err_rq0)
427427

428428
# np.testing.assert_array_equal(codes0, codes1)
429+
430+
431+
class TestGpuFlags(unittest.TestCase):
432+
433+
def test_gpu_flag(self):
434+
assert "GPU" in faiss.get_compile_options().split()

0 commit comments

Comments
 (0)