From 7498aaedaf5fe28c06d6c27bc7cc33dad376cd88 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Tue, 10 Dec 2024 11:37:04 -0800
Subject: [PATCH 01/51] first commit

---
 INSTALL.md | 7 ++++---
 README.md  | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index e16de484fe..68fb3752fd 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -37,8 +37,8 @@ $ conda install -c pytorch/label/nightly faiss-cpu
 # GPU(+CPU) version
 $ conda install -c pytorch/label/nightly -c nvidia faiss-gpu=1.9.0
 
-# GPU(+CPU) version with NVIDIA RAFT
-conda install -c pytorch -c nvidia -c rapidsai -c conda-forge faiss-gpu-raft=1.9.0 pytorch pytorch-cuda numpy
+# GPU(+CPU) version with NVIDIA cuVS (nightly)
+conda install -c pytorch -c rapidsai -c conda-forge faiss-gpu-cuvs pytorch pytorch-cuda numpy
 
 # GPU(+CPU) version using AMD ROCm not yet available
 ```
@@ -119,8 +119,9 @@ Several options can be passed to CMake, among which:
   - `-DFAISS_ENABLE_PYTHON=OFF` in order to disable building python bindings
   (possible values are `ON` and `OFF`),
   - `-DFAISS_ENABLE_CUVS=ON` in order to enable building the cuVS implementations
-    of the IVF-Flat and IVF-PQ GPU-accelerated indices (default is `OFF`, possible
+    of the IVF-Flat, IVF-PQ and CAGRA GPU-accelerated indices (default is `ON`, possible
     values are `ON` and `OFF`)
+  `-DFAISS_ENABLE_GPU` must be `ON` when using this option. (possible values are `ON` and `OFF`),
   - `-DBUILD_TESTING=OFF` in order to disable building C++ tests,
   - `-DBUILD_SHARED_LIBS=ON` in order to build a shared library (possible values
   are `ON` and `OFF`),
diff --git a/README.md b/README.md
index f00f4d7a3c..c10fbb4ca4 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ The GPU implementation can accept input from either CPU or GPU memory. On a serv
 
 ## Installing
 
-Faiss comes with precompiled libraries for Anaconda in Python, see [faiss-cpu](https://anaconda.org/pytorch/faiss-cpu) and [faiss-gpu](https://anaconda.org/pytorch/faiss-gpu). The library is mostly implemented in C++, the only dependency is a [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) implementation. Optional GPU support is provided via CUDA or AMD ROCm, and the Python interface is also optional. It compiles with cmake. See [INSTALL.md](INSTALL.md) for details.
+Faiss comes with precompiled libraries for Anaconda in Python, see [faiss-cpu](https://anaconda.org/pytorch/faiss-cpu), [faiss-gpu](https://anaconda.org/pytorch/faiss-gpu) and [faiss-gpu-cuvs](https://anaconda.org/pytorch/faiss-gpu-cuvs). The library is mostly implemented in C++, the only dependency is a [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) implementation. Optional GPU support is provided via CUDA or AMD ROCm, and the Python interface is also optional. The backend implementations GPU implementations of NVIDIA [cuVS](https://github.com/rapidsai/cuvs) can also be enabled optionally. It compiles with cmake. See [INSTALL.md](INSTALL.md) for details.
 
 ## How Faiss works
 

From 110a9b0f9dc0915c87b0aec7c5ac48a95720ada3 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Thu, 12 Dec 2024 10:09:38 -0800
Subject: [PATCH 02/51] updates

---
 INSTALL.md                    | 6 +++---
 README.md                     | 2 +-
 faiss/gpu/impl/CuvsIVFFlat.cu | 5 ++++-
 faiss/gpu/impl/CuvsIVFPQ.cu   | 5 ++++-
 4 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index 68fb3752fd..0eb066afe5 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -6,7 +6,7 @@ pre-release nightly builds.
 
 - The CPU-only faiss-cpu conda package is currently available on Linux (x86-64 and aarch64), OSX (arm64 only), and Windows (x86-64)
 - faiss-gpu, containing both CPU and GPU indices, is available on Linux (x86-64 only) for CUDA 11.4 and 12.1
-- faiss-gpu-raft containing both CPU and GPU indices provided by NVIDIA RAFT, is available on Linux (x86-64 only) for CUDA 11.8 and 12.1.
+- faiss-gpu-cuvs containing GPU indices provided by NVIDIA cuVS, is available on Linux (x86-64 only) for CUDA 11.8 and 12.4.
 
 To install the latest stable release:
 
@@ -26,7 +26,7 @@ $ conda install -c pytorch -c nvidia -c rapidsai -c conda-forge faiss-gpu-raft=1
 For faiss-gpu, the nvidia channel is required for CUDA, which is not
 published in the main anaconda channel.
 
-For faiss-gpu-raft, the nvidia, rapidsai and conda-forge channels are required.
+For faiss-gpu-cuvs, the rapidsai and conda-forge channels are required.
 
 Nightly pre-release packages can be installed as follows:
 
@@ -37,7 +37,7 @@ $ conda install -c pytorch/label/nightly faiss-cpu
 # GPU(+CPU) version
 $ conda install -c pytorch/label/nightly -c nvidia faiss-gpu=1.9.0
 
-# GPU(+CPU) version with NVIDIA cuVS (nightly)
+# GPU(+CPU) version with NVIDIA cuVS
 conda install -c pytorch -c rapidsai -c conda-forge faiss-gpu-cuvs pytorch pytorch-cuda numpy
 
 # GPU(+CPU) version using AMD ROCm not yet available
diff --git a/README.md b/README.md
index c10fbb4ca4..468ba59ab6 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ The GPU implementation can accept input from either CPU or GPU memory. On a serv
 
 ## Installing
 
-Faiss comes with precompiled libraries for Anaconda in Python, see [faiss-cpu](https://anaconda.org/pytorch/faiss-cpu), [faiss-gpu](https://anaconda.org/pytorch/faiss-gpu) and [faiss-gpu-cuvs](https://anaconda.org/pytorch/faiss-gpu-cuvs). The library is mostly implemented in C++, the only dependency is a [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) implementation. Optional GPU support is provided via CUDA or AMD ROCm, and the Python interface is also optional. The backend implementations GPU implementations of NVIDIA [cuVS](https://github.com/rapidsai/cuvs) can also be enabled optionally. It compiles with cmake. See [INSTALL.md](INSTALL.md) for details.
+Faiss comes with precompiled libraries for Anaconda in Python, see [faiss-cpu](https://anaconda.org/pytorch/faiss-cpu), [faiss-gpu](https://anaconda.org/pytorch/faiss-gpu) and [faiss-gpu-cuvs](https://anaconda.org/pytorch/faiss-gpu-cuvs). The library is mostly implemented in C++, the only dependency is a [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) implementation. Optional GPU support is provided via CUDA or AMD ROCm, and the Python interface is also optional. The backend GPU implementations of NVIDIA [cuVS](https://github.com/rapidsai/cuvs) can also be enabled optionally. It compiles with cmake. See [INSTALL.md](INSTALL.md) for details.
 
 ## How Faiss works
 
diff --git a/faiss/gpu/impl/CuvsIVFFlat.cu b/faiss/gpu/impl/CuvsIVFFlat.cu
index 0de7100c72..bf67eb938b 100644
--- a/faiss/gpu/impl/CuvsIVFFlat.cu
+++ b/faiss/gpu/impl/CuvsIVFFlat.cu
@@ -135,14 +135,16 @@ void CuvsIVFFlat::search(
 
     validRowIndices(resources_, queries, nan_flag.data_handle());
 
+    faiss::idx_t max_ind = std::numeric_limits<faiss::idx_t>::max();
     raft::linalg::map_offset(
             raft_handle,
             raft::make_device_vector_view(outIndices.data(), numQueries * k_),
             [nan_flag = nan_flag.data_handle(),
              out_inds = outIndices.data(),
+             max_ind,
              k_] __device__(uint32_t i) {
                 uint32_t row = i / k_;
-                if (!nan_flag[row])
+                if (!nan_flag[row] || out_inds[i] == max_ind)
                     return idx_t(-1);
                 return out_inds[i];
             });
@@ -291,6 +293,7 @@ void CuvsIVFFlat::searchPreassigned(
         Tensor<idx_t, 2, true>& outIndices,
         bool storePairs) {
     // TODO: Fill this in!
+    FAISS_THROW_MSG("searchPreassigned is not implemented for cuVS index");
 }
 
 void CuvsIVFFlat::updateQuantizer(Index* quantizer) {
diff --git a/faiss/gpu/impl/CuvsIVFPQ.cu b/faiss/gpu/impl/CuvsIVFPQ.cu
index 2fc94de0f0..9de199bbb6 100644
--- a/faiss/gpu/impl/CuvsIVFPQ.cu
+++ b/faiss/gpu/impl/CuvsIVFPQ.cu
@@ -229,6 +229,7 @@ void CuvsIVFPQ::searchPreassigned(
         Tensor<idx_t, 2, true>& outIndices,
         bool storePairs) {
     // TODO: Fill this in!
+    FAISS_THROW_MSG("searchPreassigned is not implemented for cuVS index");
 }
 
 size_t CuvsIVFPQ::getGpuListEncodingSize_(idx_t listId) {
@@ -331,14 +332,16 @@ void CuvsIVFPQ::search(
 
     validRowIndices(resources_, queries, nan_flag.data_handle());
 
+    auto max_ind = std::numeric_limits<faiss::idx_t>();
     raft::linalg::map_offset(
             raft_handle,
             raft::make_device_vector_view(outIndices.data(), numQueries * k_),
             [nan_flag = nan_flag.data_handle(),
              out_inds = outIndices.data(),
+             max_ind,
              k_] __device__(uint32_t i) {
                 uint32_t row = i / k_;
-                if (!nan_flag[row])
+                if (!nan_flag[row] || out_inds[i] == max_ind)
                     return idx_t(-1);
                 return out_inds[i];
             });

From 73a09836a89dd4b3e5a2da23cd637f3ebecb02b2 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Thu, 12 Dec 2024 10:12:10 -0800
Subject: [PATCH 03/51] reinstate nvidia channel for cuda11.8

---
 .github/actions/build_conda/action.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/actions/build_conda/action.yml b/.github/actions/build_conda/action.yml
index bf84a38496..9cbe0371a3 100644
--- a/.github/actions/build_conda/action.yml
+++ b/.github/actions/build_conda/action.yml
@@ -80,7 +80,7 @@ runs:
       working-directory: conda
       run: |
         conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
-            -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge
+            -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia
     - name: Conda build (GPU w/ cuVS) w/ anaconda upload
       if: inputs.label != '' && inputs.cuda != '' && inputs.cuvs != ''
       shell: ${{ steps.choose_shell.outputs.shell }}
@@ -89,4 +89,4 @@ runs:
         PACKAGE_TYPE: ${{ inputs.label }}
       run: |
         conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
-            --user pytorch --label ${{ inputs.label }} -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge
+            --user pytorch --label ${{ inputs.label }} -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia

From f2bf7f3ec2839fb374519f590463a59d98a2da9e Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Thu, 12 Dec 2024 10:15:12 -0800
Subject: [PATCH 04/51] build conda workflows

---
 .github/workflows/build-pull-request.yml | 30 ++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/.github/workflows/build-pull-request.yml b/.github/workflows/build-pull-request.yml
index d94abba081..f94077513d 100644
--- a/.github/workflows/build-pull-request.yml
+++ b/.github/workflows/build-pull-request.yml
@@ -132,6 +132,36 @@ jobs:
           fetch-tags: true
       - name: Build and Package (conda)
         uses: ./.github/actions/build_conda
+  linux-x86_64-GPU-CUVS-CUDA11-8-0-conda:
+    name: Linux x86_64 GPU w/ cuVS conda (CUDA 11.8.0)
+    runs-on: 4-core-ubuntu-gpu-t4
+    env:
+      CUDA_ARCHS: "70-real;72-real;75-real;80;86-real"
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          fetch-tags: true
+      - uses: ./.github/actions/build_conda
+        with:
+          cuvs: "ON"
+          cuda: "11.8.0"
+  linux-x86_64-GPU-CUVS-CUDA12-4-0-conda:
+    name: Linux x86_64 GPU w/ cuVS conda (CUDA 12.4.0)
+    runs-on: 4-core-ubuntu-gpu-t4
+    env:
+      CUDA_ARCHS: "70-real;72-real;75-real;80;86-real"
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          fetch-tags: true
+      - uses: ./.github/actions/build_conda
+        with:
+          cuvs: "ON"
+          cuda: "12.4.0"
   windows-x86_64-conda:
     name: Windows x86_64 (conda)
     needs: linux-x86_64-cmake

From c93f02e3a6641c172fe5443ad1db454c65ae3df7 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Thu, 12 Dec 2024 10:49:42 -0800
Subject: [PATCH 05/51] force cuda-nvcc from conda-forge

---
 .github/actions/build_conda/action.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/actions/build_conda/action.yml b/.github/actions/build_conda/action.yml
index 9cbe0371a3..fb94cf5003 100644
--- a/.github/actions/build_conda/action.yml
+++ b/.github/actions/build_conda/action.yml
@@ -80,7 +80,7 @@ runs:
       working-directory: conda
       run: |
         conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
-            -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia
+            --depends conda-forge::cuda-nvcc -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia
     - name: Conda build (GPU w/ cuVS) w/ anaconda upload
       if: inputs.label != '' && inputs.cuda != '' && inputs.cuvs != ''
       shell: ${{ steps.choose_shell.outputs.shell }}
@@ -89,4 +89,4 @@ runs:
         PACKAGE_TYPE: ${{ inputs.label }}
       run: |
         conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
-            --user pytorch --label ${{ inputs.label }} -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia
+            --user pytorch --label ${{ inputs.label }} --depends conda-forge::cuda-nvcc -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia

From 893229f746ea3eb0a01128561de51bec0c5bb0eb Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Thu, 12 Dec 2024 10:59:08 -0800
Subject: [PATCH 06/51] override with conda-forge

---
 .github/actions/build_conda/action.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/actions/build_conda/action.yml b/.github/actions/build_conda/action.yml
index fb94cf5003..e8ea594468 100644
--- a/.github/actions/build_conda/action.yml
+++ b/.github/actions/build_conda/action.yml
@@ -80,7 +80,7 @@ runs:
       working-directory: conda
       run: |
         conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
-            --depends conda-forge::cuda-nvcc -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia
+            --override-channels conda-forge -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia
     - name: Conda build (GPU w/ cuVS) w/ anaconda upload
       if: inputs.label != '' && inputs.cuda != '' && inputs.cuvs != ''
       shell: ${{ steps.choose_shell.outputs.shell }}
@@ -89,4 +89,4 @@ runs:
         PACKAGE_TYPE: ${{ inputs.label }}
       run: |
         conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
-            --user pytorch --label ${{ inputs.label }} --depends conda-forge::cuda-nvcc -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia
+            --user pytorch --label ${{ inputs.label }}--override-channels conda-forge -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia

From 84ca295915f04503f455ba0073f8ced205066bad Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Thu, 12 Dec 2024 11:11:36 -0800
Subject: [PATCH 07/51] install cuda-profiler-api separately

---
 .github/actions/build_conda/action.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/actions/build_conda/action.yml b/.github/actions/build_conda/action.yml
index e8ea594468..84bd15b29d 100644
--- a/.github/actions/build_conda/action.yml
+++ b/.github/actions/build_conda/action.yml
@@ -79,8 +79,9 @@ runs:
       shell: ${{ steps.choose_shell.outputs.shell }}
       working-directory: conda
       run: |
+        conda install -c nvidia cuda-profiler-api=${{ inputs.cuda }}
         conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
-            --override-channels conda-forge -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia
+            -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge
     - name: Conda build (GPU w/ cuVS) w/ anaconda upload
       if: inputs.label != '' && inputs.cuda != '' && inputs.cuvs != ''
       shell: ${{ steps.choose_shell.outputs.shell }}
@@ -88,5 +89,6 @@ runs:
       env:
         PACKAGE_TYPE: ${{ inputs.label }}
       run: |
+        conda install -c nvidia cuda-profiler-api=${{ inputs.cuda }}
         conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
-            --user pytorch --label ${{ inputs.label }}--override-channels conda-forge -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia
+            --user pytorch --label ${{ inputs.label }} -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge

From 1b660b020a23a6886f6cebb11520cf1e16b93bc8 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Thu, 12 Dec 2024 11:20:00 -0800
Subject: [PATCH 08/51] label channel

---
 .github/actions/build_conda/action.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/actions/build_conda/action.yml b/.github/actions/build_conda/action.yml
index 84bd15b29d..55145e7a77 100644
--- a/.github/actions/build_conda/action.yml
+++ b/.github/actions/build_conda/action.yml
@@ -79,7 +79,7 @@ runs:
       shell: ${{ steps.choose_shell.outputs.shell }}
       working-directory: conda
       run: |
-        conda install -c nvidia cuda-profiler-api=${{ inputs.cuda }}
+        conda install -c nvidia/label/cuda-${{ inputs.cuda }} cuda-profiler-api
         conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
             -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge
     - name: Conda build (GPU w/ cuVS) w/ anaconda upload
@@ -89,6 +89,6 @@ runs:
       env:
         PACKAGE_TYPE: ${{ inputs.label }}
       run: |
-        conda install -c nvidia cuda-profiler-api=${{ inputs.cuda }}
+        conda install -c -c nvidia/label/cuda-${{ inputs.cuda }} cuda-profiler-api
         conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
             --user pytorch --label ${{ inputs.label }} -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge

From 45af2a65a187c3c19e8349f28de21bd154705e86 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Thu, 12 Dec 2024 11:51:09 -0800
Subject: [PATCH 09/51] reinstate nvidia channel

---
 .github/actions/build_conda/action.yml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/.github/actions/build_conda/action.yml b/.github/actions/build_conda/action.yml
index 55145e7a77..9cbe0371a3 100644
--- a/.github/actions/build_conda/action.yml
+++ b/.github/actions/build_conda/action.yml
@@ -79,9 +79,8 @@ runs:
       shell: ${{ steps.choose_shell.outputs.shell }}
       working-directory: conda
       run: |
-        conda install -c nvidia/label/cuda-${{ inputs.cuda }} cuda-profiler-api
         conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
-            -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge
+            -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia
     - name: Conda build (GPU w/ cuVS) w/ anaconda upload
       if: inputs.label != '' && inputs.cuda != '' && inputs.cuvs != ''
       shell: ${{ steps.choose_shell.outputs.shell }}
@@ -89,6 +88,5 @@ runs:
       env:
         PACKAGE_TYPE: ${{ inputs.label }}
       run: |
-        conda install -c -c nvidia/label/cuda-${{ inputs.cuda }} cuda-profiler-api
         conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
-            --user pytorch --label ${{ inputs.label }} -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge
+            --user pytorch --label ${{ inputs.label }} -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia

From 4c57b83851ec027b6dc2f26cd6a8ed17d1601cd5 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Thu, 12 Dec 2024 12:12:51 -0800
Subject: [PATCH 10/51] cuda-runtime host

---
 conda/faiss-gpu-cuvs/meta.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml
index e32099085e..dcdbd2326c 100644
--- a/conda/faiss-gpu-cuvs/meta.yaml
+++ b/conda/faiss-gpu-cuvs/meta.yaml
@@ -60,6 +60,7 @@ outputs:
         - openblas  # [not x86_64]
         - libcuvs =24.08
         - cuda-version {{ cudatoolkit }}
+        - cuda-runtime {{ cudatoolkit }}
       run:
         - _openmp_mutex =4.5=2_kmp_llvm  # [x86_64]
         - mkl =2023  # [x86_64]

From 09e50c38410ed3e7f7825117250ad63c646a8926 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Thu, 12 Dec 2024 12:26:59 -0800
Subject: [PATCH 11/51] cudatoolkit dep

---
 conda/faiss-gpu-cuvs/meta.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml
index dcdbd2326c..f7e381890e 100644
--- a/conda/faiss-gpu-cuvs/meta.yaml
+++ b/conda/faiss-gpu-cuvs/meta.yaml
@@ -60,7 +60,7 @@ outputs:
         - openblas  # [not x86_64]
         - libcuvs =24.08
         - cuda-version {{ cudatoolkit }}
-        - cuda-runtime {{ cudatoolkit }}
+        - cudatoolkit {{ cudatoolkit }}
       run:
         - _openmp_mutex =4.5=2_kmp_llvm  # [x86_64]
         - mkl =2023  # [x86_64]

From 9ab52154c23062377f2d5f59f1e145d7df79b2b3 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Thu, 12 Dec 2024 13:04:43 -0800
Subject: [PATCH 12/51] update deps

---
 conda/faiss-gpu-cuvs/meta.yaml | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml
index f7e381890e..05cc593332 100644
--- a/conda/faiss-gpu-cuvs/meta.yaml
+++ b/conda/faiss-gpu-cuvs/meta.yaml
@@ -46,6 +46,11 @@ outputs:
     requirements:
       build:
         - {{ compiler('cxx') }}
+        {% if cudatoolkit == '11.8.0' %}
+        - {{ compiler('cuda11') }} ={{ cudatoolkit }}
+        {% else %}
+        - {{ compiler('cuda') }}
+        {% endif %}
         - sysroot_linux-64  # [linux64]
         - llvm-openmp  # [osx]
         - cmake >=3.26.4
@@ -54,18 +59,25 @@ outputs:
         - mkl =2023  # [x86_64]
         - mkl-devel =2023  # [x86_64]
         - cuda-toolkit {{ cudatoolkit }}
+        - cuda-version {{ cudatoolkit }}
       host:
         - _openmp_mutex =4.5=2_kmp_llvm  # [x86_64]
         - mkl =2023  # [x86_64]
         - openblas  # [not x86_64]
         - libcuvs =24.08
         - cuda-version {{ cudatoolkit }}
-        - cudatoolkit {{ cudatoolkit }}
+        {% if cuda_major == "11" %}
+        - cuda-profiler-api =11.8.86
+        {% endif %}
       run:
         - _openmp_mutex =4.5=2_kmp_llvm  # [x86_64]
         - mkl =2023  # [x86_64]
         - openblas  # [not x86_64]
-        - cuda-cudart {{ cuda_constraints }}
+        {% if cudatoolkit == '11.8.0' %}
+        - cudatoolkit
+        {% else %}
+        - cuda-cudart
+        {% endif %}
         - libcublas {{ libcublas_constraints }}
         - libcuvs =24.08
         - cuda-version {{ cuda_constraints }}

From 3134c5c8404809b66f5db27a58cd4549d96e2c89 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Thu, 12 Dec 2024 13:07:22 -0800
Subject: [PATCH 13/51] rm syntax error

---
 conda/faiss-gpu-cuvs/meta.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml
index 05cc593332..18cfb56d2f 100644
--- a/conda/faiss-gpu-cuvs/meta.yaml
+++ b/conda/faiss-gpu-cuvs/meta.yaml
@@ -66,7 +66,7 @@ outputs:
         - openblas  # [not x86_64]
         - libcuvs =24.08
         - cuda-version {{ cudatoolkit }}
-        {% if cuda_major == "11" %}
+        {% if cudatoolkit == '11.8.0' %}
         - cuda-profiler-api =11.8.86
         {% endif %}
       run:

From 948ee8cf7cdc89ca66c5a002d4244683390366ab Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Thu, 12 Dec 2024 13:14:47 -0800
Subject: [PATCH 14/51] cuda11 pin

---
 conda/faiss-gpu-cuvs/meta.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml
index 18cfb56d2f..30e6157314 100644
--- a/conda/faiss-gpu-cuvs/meta.yaml
+++ b/conda/faiss-gpu-cuvs/meta.yaml
@@ -47,7 +47,7 @@ outputs:
       build:
         - {{ compiler('cxx') }}
         {% if cudatoolkit == '11.8.0' %}
-        - {{ compiler('cuda11') }} ={{ cudatoolkit }}
+        - {{ compiler('cuda11') }} ={{ 11.8 }}
         {% else %}
         - {{ compiler('cuda') }}
         {% endif %}

From 6f19e932a32aaf3682daedbc91bfe479fb4ea057 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Thu, 12 Dec 2024 13:23:29 -0800
Subject: [PATCH 15/51] rm cuda11 pin

---
 conda/faiss-gpu-cuvs/meta.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml
index 30e6157314..acef609c6a 100644
--- a/conda/faiss-gpu-cuvs/meta.yaml
+++ b/conda/faiss-gpu-cuvs/meta.yaml
@@ -47,7 +47,7 @@ outputs:
       build:
         - {{ compiler('cxx') }}
         {% if cudatoolkit == '11.8.0' %}
-        - {{ compiler('cuda11') }} ={{ 11.8 }}
+        - {{ compiler('cuda11') }}
         {% else %}
         - {{ compiler('cuda') }}
         {% endif %}

From 12da8ead32014370bcdb0d96546a44674d297569 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Thu, 12 Dec 2024 13:58:36 -0800
Subject: [PATCH 16/51] nvcc, cuda-nvcc

---
 conda/faiss-gpu-cuvs/meta.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml
index acef609c6a..f257ae2052 100644
--- a/conda/faiss-gpu-cuvs/meta.yaml
+++ b/conda/faiss-gpu-cuvs/meta.yaml
@@ -47,9 +47,9 @@ outputs:
       build:
         - {{ compiler('cxx') }}
         {% if cudatoolkit == '11.8.0' %}
-        - {{ compiler('cuda11') }}
+        - {{ compiler('nvcc') }}
         {% else %}
-        - {{ compiler('cuda') }}
+        - {{ compiler('cuda-nvcc') }}
         {% endif %}
         - sysroot_linux-64  # [linux64]
         - llvm-openmp  # [osx]

From 202646785c5c84b69c87c1522bd76844376124c3 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Thu, 12 Dec 2024 14:31:51 -0800
Subject: [PATCH 17/51] cuda-toolkit conditionally

---
 conda/faiss-gpu-cuvs/meta.yaml | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml
index f257ae2052..4efd8dc2af 100644
--- a/conda/faiss-gpu-cuvs/meta.yaml
+++ b/conda/faiss-gpu-cuvs/meta.yaml
@@ -49,6 +49,7 @@ outputs:
         {% if cudatoolkit == '11.8.0' %}
         - {{ compiler('nvcc') }}
         {% else %}
+        - cuda-toolkit {{ cudatoolkit }}
         - {{ compiler('cuda-nvcc') }}
         {% endif %}
         - sysroot_linux-64  # [linux64]
@@ -58,7 +59,6 @@ outputs:
         - _openmp_mutex =4.5=2_kmp_llvm  # [x86_64]
         - mkl =2023  # [x86_64]
         - mkl-devel =2023  # [x86_64]
-        - cuda-toolkit {{ cudatoolkit }}
         - cuda-version {{ cudatoolkit }}
       host:
         - _openmp_mutex =4.5=2_kmp_llvm  # [x86_64]
@@ -100,13 +100,18 @@ outputs:
     requirements:
       build:
         - {{ compiler('cxx') }}
+        {% if cudatoolkit == '11.8.0' %}
+        - {{ compiler('nvcc') }}
+        {% else %}
+        - cuda-toolkit {{ cudatoolkit }}
+        - {{ compiler('cuda-nvcc') }}
+        {% endif %}
         - sysroot_linux-64 =2.17 # [linux64]
         - swig
         - cmake >=3.24.0
         - make  # [not win]
         - _openmp_mutex =4.5=2_kmp_llvm  # [x86_64]
         - mkl =2023  # [x86_64]
-        - cuda-toolkit {{ cudatoolkit }}
       host:
         - _openmp_mutex =4.5=2_kmp_llvm  # [x86_64]
         - python {{ python }}

From db09a56fc068f93111875f8a9e6570511ec66859 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Thu, 12 Dec 2024 15:03:31 -0800
Subject: [PATCH 18/51] nvcc install

---
 conda/faiss-gpu-cuvs/meta.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml
index 4efd8dc2af..bb37c028b3 100644
--- a/conda/faiss-gpu-cuvs/meta.yaml
+++ b/conda/faiss-gpu-cuvs/meta.yaml
@@ -101,6 +101,7 @@ outputs:
       build:
         - {{ compiler('cxx') }}
         {% if cudatoolkit == '11.8.0' %}
+        - nvcc_linux-64
         - {{ compiler('nvcc') }}
         {% else %}
         - cuda-toolkit {{ cudatoolkit }}

From 1a3a45c749692387a0b99199251dab3c0707378c Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Thu, 12 Dec 2024 15:33:08 -0800
Subject: [PATCH 19/51] install cudatoolkit-dev

---
 .github/actions/build_conda/action.yml | 2 ++
 conda/faiss-gpu-cuvs/meta.yaml         | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/actions/build_conda/action.yml b/.github/actions/build_conda/action.yml
index 9cbe0371a3..d9008d08d4 100644
--- a/.github/actions/build_conda/action.yml
+++ b/.github/actions/build_conda/action.yml
@@ -36,6 +36,8 @@ runs:
       run: |
         conda install -y -q "conda!=24.11.0"
         conda install -y -q "conda-build!=24.11.0"
+        which nvcc
+        echo $CUDA_HOME
     - name: Enable anaconda uploads
       if: inputs.label != ''
       shell: ${{ steps.choose_shell.outputs.shell }}
diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml
index bb37c028b3..dafbb294d7 100644
--- a/conda/faiss-gpu-cuvs/meta.yaml
+++ b/conda/faiss-gpu-cuvs/meta.yaml
@@ -47,6 +47,7 @@ outputs:
       build:
         - {{ compiler('cxx') }}
         {% if cudatoolkit == '11.8.0' %}
+        - cudatoolkit-dev
         - {{ compiler('nvcc') }}
         {% else %}
         - cuda-toolkit {{ cudatoolkit }}
@@ -101,7 +102,7 @@ outputs:
       build:
         - {{ compiler('cxx') }}
         {% if cudatoolkit == '11.8.0' %}
-        - nvcc_linux-64
+        - cudatoolkit-dev
         - {{ compiler('nvcc') }}
         {% else %}
         - cuda-toolkit {{ cudatoolkit }}

From aef391d4a7870e3af5aaa680b23231e25b6cd96a Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Thu, 12 Dec 2024 15:35:45 -0800
Subject: [PATCH 20/51] empty commit


From b90af9143c73ca6a7e628080b86760ee8ea42060 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Fri, 13 Dec 2024 06:20:12 -0800
Subject: [PATCH 21/51] set CUDA_HOME

---
 .github/actions/build_conda/action.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/actions/build_conda/action.yml b/.github/actions/build_conda/action.yml
index d9008d08d4..58b50dedcb 100644
--- a/.github/actions/build_conda/action.yml
+++ b/.github/actions/build_conda/action.yml
@@ -36,8 +36,7 @@ runs:
       run: |
         conda install -y -q "conda!=24.11.0"
         conda install -y -q "conda-build!=24.11.0"
-        which nvcc
-        echo $CUDA_HOME
+        export CUDA_HOME="/usr/local/cuda-11.8"
     - name: Enable anaconda uploads
       if: inputs.label != ''
       shell: ${{ steps.choose_shell.outputs.shell }}

From 6f53a290f9da95c2d4fc4de41bcb8c977f7753a3 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Fri, 13 Dec 2024 07:06:26 -0800
Subject: [PATCH 22/51] env variables cuda_home

---
 .github/actions/build_conda/action.yml | 1 -
 conda/faiss-gpu-cuvs/meta.yaml         | 8 +++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/.github/actions/build_conda/action.yml b/.github/actions/build_conda/action.yml
index 58b50dedcb..9cbe0371a3 100644
--- a/.github/actions/build_conda/action.yml
+++ b/.github/actions/build_conda/action.yml
@@ -36,7 +36,6 @@ runs:
       run: |
         conda install -y -q "conda!=24.11.0"
         conda install -y -q "conda-build!=24.11.0"
-        export CUDA_HOME="/usr/local/cuda-11.8"
     - name: Enable anaconda uploads
       if: inputs.label != ''
       shell: ${{ steps.choose_shell.outputs.shell }}
diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml
index dafbb294d7..7517cd6f9f 100644
--- a/conda/faiss-gpu-cuvs/meta.yaml
+++ b/conda/faiss-gpu-cuvs/meta.yaml
@@ -43,12 +43,13 @@ outputs:
         - {{ pin_compatible('libfaiss', exact=True) }}
       script_env:
         - CUDA_ARCHS
+        - CUDA_HOME=$CONDA_PREFIX
     requirements:
       build:
         - {{ compiler('cxx') }}
         {% if cudatoolkit == '11.8.0' %}
         - cudatoolkit-dev
-        - {{ compiler('nvcc') }}
+        - {{ compiler('nvcc') }} =11.8.0
         {% else %}
         - cuda-toolkit {{ cudatoolkit }}
         - {{ compiler('cuda-nvcc') }}
@@ -98,12 +99,13 @@ outputs:
     script: build-pkg.bat  # [win]
     build:
       string: "py{{ PY_VER }}_h{{ PKG_HASH }}_{{ number }}_cuda{{ cudatoolkit }}{{ suffix }}"
+      script_env:
+        - CUDA_HOME=$CONDA_PREFIX
     requirements:
       build:
         - {{ compiler('cxx') }}
         {% if cudatoolkit == '11.8.0' %}
-        - cudatoolkit-dev
-        - {{ compiler('nvcc') }}
+        - {{ compiler('nvcc') }} =11.8.0
         {% else %}
         - cuda-toolkit {{ cudatoolkit }}
         - {{ compiler('cuda-nvcc') }}

From f47817e791f5b826ff3e6ca78707f520e06b956a Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Fri, 13 Dec 2024 07:20:52 -0800
Subject: [PATCH 23/51] rm cudatoolkit-dev

---
 conda/faiss-gpu-cuvs/meta.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml
index 7517cd6f9f..2aba9b32ee 100644
--- a/conda/faiss-gpu-cuvs/meta.yaml
+++ b/conda/faiss-gpu-cuvs/meta.yaml
@@ -48,7 +48,6 @@ outputs:
       build:
         - {{ compiler('cxx') }}
         {% if cudatoolkit == '11.8.0' %}
-        - cudatoolkit-dev
         - {{ compiler('nvcc') }} =11.8.0
         {% else %}
         - cuda-toolkit {{ cudatoolkit }}

From 241a247de135e4e45585813274409c2a5bda9678 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Fri, 13 Dec 2024 07:37:57 -0800
Subject: [PATCH 24/51] syntax error

---
 conda/faiss-gpu-cuvs/meta.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml
index 2aba9b32ee..6545702c56 100644
--- a/conda/faiss-gpu-cuvs/meta.yaml
+++ b/conda/faiss-gpu-cuvs/meta.yaml
@@ -43,7 +43,7 @@ outputs:
         - {{ pin_compatible('libfaiss', exact=True) }}
       script_env:
         - CUDA_ARCHS
-        - CUDA_HOME=$CONDA_PREFIX
+        - CUDA_HOME ={{ CONDA_PREFIX }}
     requirements:
       build:
         - {{ compiler('cxx') }}
@@ -99,7 +99,7 @@ outputs:
     build:
       string: "py{{ PY_VER }}_h{{ PKG_HASH }}_{{ number }}_cuda{{ cudatoolkit }}{{ suffix }}"
       script_env:
-        - CUDA_HOME=$CONDA_PREFIX
+        - CUDA_HOME ={{ CONDA_PREFIX }}
     requirements:
       build:
         - {{ compiler('cxx') }}

From bc08cda4d67d7e0916ae952b239d9a5f8cd791ac Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Fri, 13 Dec 2024 08:18:16 -0800
Subject: [PATCH 25/51] BUILD_PREFIX

---
 conda/faiss-gpu-cuvs/build-lib.sh | 2 +-
 conda/faiss-gpu-cuvs/build-pkg.sh | 1 +
 conda/faiss-gpu-cuvs/meta.yaml    | 3 ---
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/conda/faiss-gpu-cuvs/build-lib.sh b/conda/faiss-gpu-cuvs/build-lib.sh
index 37f0381809..d40ed1296e 100644
--- a/conda/faiss-gpu-cuvs/build-lib.sh
+++ b/conda/faiss-gpu-cuvs/build-lib.sh
@@ -6,7 +6,7 @@
 
 set -e
 
-
+export CUDA_HOME=$BUILD_PREFIX
 # Build libfaiss.so/libfaiss_avx2.so/libfaiss_avx512.so
 cmake -B _build \
       -DBUILD_SHARED_LIBS=ON \
diff --git a/conda/faiss-gpu-cuvs/build-pkg.sh b/conda/faiss-gpu-cuvs/build-pkg.sh
index 09d6e6b7af..4ed395d588 100644
--- a/conda/faiss-gpu-cuvs/build-pkg.sh
+++ b/conda/faiss-gpu-cuvs/build-pkg.sh
@@ -6,6 +6,7 @@
 
 set -e
 
+export CUDA_HOME=$BUILD_PREFIX
 
 # Build swigfaiss.so/swigfaiss_avx2.so/swigfaiss_avx512.so
 cmake -B _build_python_${PY_VER} \
diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml
index 6545702c56..342757980b 100644
--- a/conda/faiss-gpu-cuvs/meta.yaml
+++ b/conda/faiss-gpu-cuvs/meta.yaml
@@ -43,7 +43,6 @@ outputs:
         - {{ pin_compatible('libfaiss', exact=True) }}
       script_env:
         - CUDA_ARCHS
-        - CUDA_HOME ={{ CONDA_PREFIX }}
     requirements:
       build:
         - {{ compiler('cxx') }}
@@ -98,8 +97,6 @@ outputs:
     script: build-pkg.bat  # [win]
     build:
       string: "py{{ PY_VER }}_h{{ PKG_HASH }}_{{ number }}_cuda{{ cudatoolkit }}{{ suffix }}"
-      script_env:
-        - CUDA_HOME ={{ CONDA_PREFIX }}
     requirements:
       build:
         - {{ compiler('cxx') }}

From 899da3c2c5dc5c2b5981d9fc91d1efe26739b7b1 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Fri, 13 Dec 2024 09:15:27 -0800
Subject: [PATCH 26/51] cuda-toolkit install

---
 conda/faiss-gpu-cuvs/meta.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml
index 342757980b..77856fb299 100644
--- a/conda/faiss-gpu-cuvs/meta.yaml
+++ b/conda/faiss-gpu-cuvs/meta.yaml
@@ -49,7 +49,6 @@ outputs:
         {% if cudatoolkit == '11.8.0' %}
         - {{ compiler('nvcc') }} =11.8.0
         {% else %}
-        - cuda-toolkit {{ cudatoolkit }}
         - {{ compiler('cuda-nvcc') }}
         {% endif %}
         - sysroot_linux-64  # [linux64]
@@ -60,6 +59,7 @@ outputs:
         - mkl =2023  # [x86_64]
         - mkl-devel =2023  # [x86_64]
         - cuda-version {{ cudatoolkit }}
+        - cuda-toolkit {{ cudatoolkit }}
       host:
         - _openmp_mutex =4.5=2_kmp_llvm  # [x86_64]
         - mkl =2023  # [x86_64]
@@ -103,7 +103,6 @@ outputs:
         {% if cudatoolkit == '11.8.0' %}
         - {{ compiler('nvcc') }} =11.8.0
         {% else %}
-        - cuda-toolkit {{ cudatoolkit }}
         - {{ compiler('cuda-nvcc') }}
         {% endif %}
         - sysroot_linux-64 =2.17 # [linux64]
@@ -112,6 +111,7 @@ outputs:
         - make  # [not win]
         - _openmp_mutex =4.5=2_kmp_llvm  # [x86_64]
         - mkl =2023  # [x86_64]
+        - cuda-toolkit {{ cudatoolkit }}
       host:
         - _openmp_mutex =4.5=2_kmp_llvm  # [x86_64]
         - python {{ python }}

From 52b64414ee2aa3ad1692979a074cabcb72af090a Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Fri, 13 Dec 2024 12:17:22 -0800
Subject: [PATCH 27/51] reset

---
 conda/faiss-gpu-cuvs/build-lib.sh |  2 +-
 conda/faiss-gpu-cuvs/build-pkg.sh |  2 +-
 conda/faiss-gpu-cuvs/meta.yaml    | 15 +--------------
 3 files changed, 3 insertions(+), 16 deletions(-)

diff --git a/conda/faiss-gpu-cuvs/build-lib.sh b/conda/faiss-gpu-cuvs/build-lib.sh
index d40ed1296e..9957c84413 100644
--- a/conda/faiss-gpu-cuvs/build-lib.sh
+++ b/conda/faiss-gpu-cuvs/build-lib.sh
@@ -6,7 +6,7 @@
 
 set -e
 
-export CUDA_HOME=$BUILD_PREFIX
+# export CUDA_HOME=$BUILD_PREFIX
 # Build libfaiss.so/libfaiss_avx2.so/libfaiss_avx512.so
 cmake -B _build \
       -DBUILD_SHARED_LIBS=ON \
diff --git a/conda/faiss-gpu-cuvs/build-pkg.sh b/conda/faiss-gpu-cuvs/build-pkg.sh
index 4ed395d588..ad70dc2a21 100644
--- a/conda/faiss-gpu-cuvs/build-pkg.sh
+++ b/conda/faiss-gpu-cuvs/build-pkg.sh
@@ -6,7 +6,7 @@
 
 set -e
 
-export CUDA_HOME=$BUILD_PREFIX
+# export CUDA_HOME=$BUILD_PREFIX
 
 # Build swigfaiss.so/swigfaiss_avx2.so/swigfaiss_avx512.so
 cmake -B _build_python_${PY_VER} \
diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml
index 77856fb299..49d9d888e6 100644
--- a/conda/faiss-gpu-cuvs/meta.yaml
+++ b/conda/faiss-gpu-cuvs/meta.yaml
@@ -46,11 +46,6 @@ outputs:
     requirements:
       build:
         - {{ compiler('cxx') }}
-        {% if cudatoolkit == '11.8.0' %}
-        - {{ compiler('nvcc') }} =11.8.0
-        {% else %}
-        - {{ compiler('cuda-nvcc') }}
-        {% endif %}
         - sysroot_linux-64  # [linux64]
         - llvm-openmp  # [osx]
         - cmake >=3.26.4
@@ -60,6 +55,7 @@ outputs:
         - mkl-devel =2023  # [x86_64]
         - cuda-version {{ cudatoolkit }}
         - cuda-toolkit {{ cudatoolkit }}
+        - cuda-profiler-api =11.8.86
       host:
         - _openmp_mutex =4.5=2_kmp_llvm  # [x86_64]
         - mkl =2023  # [x86_64]
@@ -73,11 +69,7 @@ outputs:
         - _openmp_mutex =4.5=2_kmp_llvm  # [x86_64]
         - mkl =2023  # [x86_64]
         - openblas  # [not x86_64]
-        {% if cudatoolkit == '11.8.0' %}
-        - cudatoolkit
-        {% else %}
         - cuda-cudart
-        {% endif %}
         - libcublas {{ libcublas_constraints }}
         - libcuvs =24.08
         - cuda-version {{ cuda_constraints }}
@@ -100,11 +92,6 @@ outputs:
     requirements:
       build:
         - {{ compiler('cxx') }}
-        {% if cudatoolkit == '11.8.0' %}
-        - {{ compiler('nvcc') }} =11.8.0
-        {% else %}
-        - {{ compiler('cuda-nvcc') }}
-        {% endif %}
         - sysroot_linux-64 =2.17 # [linux64]
         - swig
         - cmake >=3.24.0

From 50d70cfa41499fecb6511e0b6134d973b989c36d Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Sat, 14 Dec 2024 08:33:43 -0800
Subject: [PATCH 28/51] pin gcc version

---
 conda/faiss-gpu-cuvs/meta.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml
index 49d9d888e6..822c0ad3ee 100644
--- a/conda/faiss-gpu-cuvs/meta.yaml
+++ b/conda/faiss-gpu-cuvs/meta.yaml
@@ -45,7 +45,7 @@ outputs:
         - CUDA_ARCHS
     requirements:
       build:
-        - {{ compiler('cxx') }}
+        - {{ compiler('cxx') }} =12
         - sysroot_linux-64  # [linux64]
         - llvm-openmp  # [osx]
         - cmake >=3.26.4
@@ -91,7 +91,7 @@ outputs:
       string: "py{{ PY_VER }}_h{{ PKG_HASH }}_{{ number }}_cuda{{ cudatoolkit }}{{ suffix }}"
     requirements:
       build:
-        - {{ compiler('cxx') }}
+        - {{ compiler('cxx') }} =12
         - sysroot_linux-64 =2.17 # [linux64]
         - swig
         - cmake >=3.24.0

From bf02d202503458a3486f1d56655bc811c67d3cbf Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Sun, 15 Dec 2024 08:09:28 -0800
Subject: [PATCH 29/51] rm everything but cuda-toolkit

---
 conda/faiss-gpu-cuvs/meta.yaml | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml
index 822c0ad3ee..d8c681b2d7 100644
--- a/conda/faiss-gpu-cuvs/meta.yaml
+++ b/conda/faiss-gpu-cuvs/meta.yaml
@@ -53,26 +53,18 @@ outputs:
         - _openmp_mutex =4.5=2_kmp_llvm  # [x86_64]
         - mkl =2023  # [x86_64]
         - mkl-devel =2023  # [x86_64]
-        - cuda-version {{ cudatoolkit }}
         - cuda-toolkit {{ cudatoolkit }}
-        - cuda-profiler-api =11.8.86
       host:
         - _openmp_mutex =4.5=2_kmp_llvm  # [x86_64]
         - mkl =2023  # [x86_64]
         - openblas  # [not x86_64]
         - libcuvs =24.08
-        - cuda-version {{ cudatoolkit }}
-        {% if cudatoolkit == '11.8.0' %}
-        - cuda-profiler-api =11.8.86
-        {% endif %}
       run:
         - _openmp_mutex =4.5=2_kmp_llvm  # [x86_64]
         - mkl =2023  # [x86_64]
         - openblas  # [not x86_64]
-        - cuda-cudart
         - libcublas {{ libcublas_constraints }}
         - libcuvs =24.08
-        - cuda-version {{ cuda_constraints }}
     test:
       requires:
         - conda-build

From 8d8e9465ecce5156235ef7eec6c39563f8b9e927 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Mon, 16 Dec 2024 08:41:01 -0800
Subject: [PATCH 30/51] rm compilation issues

---
 faiss/gpu/impl/CuvsIVFFlat.cu | 4 +---
 faiss/gpu/impl/CuvsIVFPQ.cu   | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/faiss/gpu/impl/CuvsIVFFlat.cu b/faiss/gpu/impl/CuvsIVFFlat.cu
index bf67eb938b..ef6e1a3d85 100644
--- a/faiss/gpu/impl/CuvsIVFFlat.cu
+++ b/faiss/gpu/impl/CuvsIVFFlat.cu
@@ -135,16 +135,14 @@ void CuvsIVFFlat::search(
 
     validRowIndices(resources_, queries, nan_flag.data_handle());
 
-    faiss::idx_t max_ind = std::numeric_limits<faiss::idx_t>::max();
     raft::linalg::map_offset(
             raft_handle,
             raft::make_device_vector_view(outIndices.data(), numQueries * k_),
             [nan_flag = nan_flag.data_handle(),
              out_inds = outIndices.data(),
-             max_ind,
              k_] __device__(uint32_t i) {
                 uint32_t row = i / k_;
-                if (!nan_flag[row] || out_inds[i] == max_ind)
+                if (!nan_flag[row])
                     return idx_t(-1);
                 return out_inds[i];
             });
diff --git a/faiss/gpu/impl/CuvsIVFPQ.cu b/faiss/gpu/impl/CuvsIVFPQ.cu
index 9de199bbb6..7172dad9b4 100644
--- a/faiss/gpu/impl/CuvsIVFPQ.cu
+++ b/faiss/gpu/impl/CuvsIVFPQ.cu
@@ -332,16 +332,14 @@ void CuvsIVFPQ::search(
 
     validRowIndices(resources_, queries, nan_flag.data_handle());
 
-    auto max_ind = std::numeric_limits<faiss::idx_t>();
     raft::linalg::map_offset(
             raft_handle,
             raft::make_device_vector_view(outIndices.data(), numQueries * k_),
             [nan_flag = nan_flag.data_handle(),
              out_inds = outIndices.data(),
-             max_ind,
              k_] __device__(uint32_t i) {
                 uint32_t row = i / k_;
-                if (!nan_flag[row] || out_inds[i] == max_ind)
+                if (!nan_flag[row])
                     return idx_t(-1);
                 return out_inds[i];
             });

From bd7d4962cfb44a5b234ca9ae756b6a638c423a66 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Mon, 16 Dec 2024 10:52:36 -0800
Subject: [PATCH 31/51] cudart

---
 conda/faiss-gpu-cuvs/meta.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml
index d8c681b2d7..de3f8eae2b 100644
--- a/conda/faiss-gpu-cuvs/meta.yaml
+++ b/conda/faiss-gpu-cuvs/meta.yaml
@@ -59,12 +59,15 @@ outputs:
         - mkl =2023  # [x86_64]
         - openblas  # [not x86_64]
         - libcuvs =24.08
+        - cuda-version {{ cudatoolkit }}
       run:
         - _openmp_mutex =4.5=2_kmp_llvm  # [x86_64]
         - mkl =2023  # [x86_64]
         - openblas  # [not x86_64]
+        - cuda-cudart {{ cuda_constraints }}
         - libcublas {{ libcublas_constraints }}
         - libcuvs =24.08
+        - cuda-version {{ cudatoolkit }}
     test:
       requires:
         - conda-build

From 3597fd0de2c4979df86ce0f5d841405e1ae9f6a3 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Tue, 17 Dec 2024 11:12:28 -0800
Subject: [PATCH 32/51] rm conda CI

---
 .github/workflows/build-pull-request.yml | 30 ------------------------
 conda/faiss-gpu-cuvs/build-lib.sh        |  2 +-
 conda/faiss-gpu-cuvs/build-pkg.sh        |  1 -
 3 files changed, 1 insertion(+), 32 deletions(-)

diff --git a/.github/workflows/build-pull-request.yml b/.github/workflows/build-pull-request.yml
index f94077513d..d94abba081 100644
--- a/.github/workflows/build-pull-request.yml
+++ b/.github/workflows/build-pull-request.yml
@@ -132,36 +132,6 @@ jobs:
           fetch-tags: true
       - name: Build and Package (conda)
         uses: ./.github/actions/build_conda
-  linux-x86_64-GPU-CUVS-CUDA11-8-0-conda:
-    name: Linux x86_64 GPU w/ cuVS conda (CUDA 11.8.0)
-    runs-on: 4-core-ubuntu-gpu-t4
-    env:
-      CUDA_ARCHS: "70-real;72-real;75-real;80;86-real"
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-          fetch-tags: true
-      - uses: ./.github/actions/build_conda
-        with:
-          cuvs: "ON"
-          cuda: "11.8.0"
-  linux-x86_64-GPU-CUVS-CUDA12-4-0-conda:
-    name: Linux x86_64 GPU w/ cuVS conda (CUDA 12.4.0)
-    runs-on: 4-core-ubuntu-gpu-t4
-    env:
-      CUDA_ARCHS: "70-real;72-real;75-real;80;86-real"
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-          fetch-tags: true
-      - uses: ./.github/actions/build_conda
-        with:
-          cuvs: "ON"
-          cuda: "12.4.0"
   windows-x86_64-conda:
     name: Windows x86_64 (conda)
     needs: linux-x86_64-cmake
diff --git a/conda/faiss-gpu-cuvs/build-lib.sh b/conda/faiss-gpu-cuvs/build-lib.sh
index 9957c84413..37f0381809 100644
--- a/conda/faiss-gpu-cuvs/build-lib.sh
+++ b/conda/faiss-gpu-cuvs/build-lib.sh
@@ -6,7 +6,7 @@
 
 set -e
 
-# export CUDA_HOME=$BUILD_PREFIX
+
 # Build libfaiss.so/libfaiss_avx2.so/libfaiss_avx512.so
 cmake -B _build \
       -DBUILD_SHARED_LIBS=ON \
diff --git a/conda/faiss-gpu-cuvs/build-pkg.sh b/conda/faiss-gpu-cuvs/build-pkg.sh
index ad70dc2a21..09d6e6b7af 100644
--- a/conda/faiss-gpu-cuvs/build-pkg.sh
+++ b/conda/faiss-gpu-cuvs/build-pkg.sh
@@ -6,7 +6,6 @@
 
 set -e
 
-# export CUDA_HOME=$BUILD_PREFIX
 
 # Build swigfaiss.so/swigfaiss_avx2.so/swigfaiss_avx512.so
 cmake -B _build_python_${PY_VER} \

From 653042f4136bbfe5ed83c40b3ea6736799f6d5c8 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Tue, 17 Dec 2024 11:15:10 -0800
Subject: [PATCH 33/51] cuvs url

---
 INSTALL.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/INSTALL.md b/INSTALL.md
index 0eb066afe5..d66e543916 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -6,7 +6,7 @@ pre-release nightly builds.
 
 - The CPU-only faiss-cpu conda package is currently available on Linux (x86-64 and aarch64), OSX (arm64 only), and Windows (x86-64)
 - faiss-gpu, containing both CPU and GPU indices, is available on Linux (x86-64 only) for CUDA 11.4 and 12.1
-- faiss-gpu-cuvs containing GPU indices provided by NVIDIA cuVS, is available on Linux (x86-64 only) for CUDA 11.8 and 12.4.
+- faiss-gpu-cuvs containing GPU indices provided by [NVIDIA cuVS](https://github.com/rapidsai/cuvs/), is available on Linux (x86-64 only) for CUDA 11.8 and 12.4.
 
 To install the latest stable release:
 

From b93a189dd3d24d7f16edc3eaf22f21a4892a62b7 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Tue, 17 Dec 2024 19:39:09 -0800
Subject: [PATCH 34/51] relax cuda_constraints, conditionally use nvidia
 channel

---
 .github/actions/build_conda/action.yml   | 20 ++++++++++++----
 .github/workflows/build-pull-request.yml | 30 ++++++++++++++++++++++++
 conda/faiss-gpu-cuvs/meta.yaml           | 10 ++++----
 3 files changed, 51 insertions(+), 9 deletions(-)

diff --git a/.github/actions/build_conda/action.yml b/.github/actions/build_conda/action.yml
index 9cbe0371a3..f4db126b34 100644
--- a/.github/actions/build_conda/action.yml
+++ b/.github/actions/build_conda/action.yml
@@ -79,8 +79,14 @@ runs:
       shell: ${{ steps.choose_shell.outputs.shell }}
       working-directory: conda
       run: |
-        conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
-            -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia
+        # avoid using nvidia channel for CUDA 12.4
+        if [ "${{ inputs.cuda }}" != "11.8.0" ]; then
+          conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
+              -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge
+        else
+          conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
+              -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia
+        fi
     - name: Conda build (GPU w/ cuVS) w/ anaconda upload
       if: inputs.label != '' && inputs.cuda != '' && inputs.cuvs != ''
       shell: ${{ steps.choose_shell.outputs.shell }}
@@ -88,5 +94,11 @@ runs:
       env:
         PACKAGE_TYPE: ${{ inputs.label }}
       run: |
-        conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
-            --user pytorch --label ${{ inputs.label }} -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia
+        # avoid using nvidia channel for CUDA 12.4
+        if [ "${{ inputs.cuda }}" != "11.8.0" ]; then
+          conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
+              --user pytorch --label ${{ inputs.label }} -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia
+        else
+          conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
+              --user pytorch --label ${{ inputs.label }} -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge
+        fi
diff --git a/.github/workflows/build-pull-request.yml b/.github/workflows/build-pull-request.yml
index d94abba081..f94077513d 100644
--- a/.github/workflows/build-pull-request.yml
+++ b/.github/workflows/build-pull-request.yml
@@ -132,6 +132,36 @@ jobs:
           fetch-tags: true
       - name: Build and Package (conda)
         uses: ./.github/actions/build_conda
+  linux-x86_64-GPU-CUVS-CUDA11-8-0-conda:
+    name: Linux x86_64 GPU w/ cuVS conda (CUDA 11.8.0)
+    runs-on: 4-core-ubuntu-gpu-t4
+    env:
+      CUDA_ARCHS: "70-real;72-real;75-real;80;86-real"
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          fetch-tags: true
+      - uses: ./.github/actions/build_conda
+        with:
+          cuvs: "ON"
+          cuda: "11.8.0"
+  linux-x86_64-GPU-CUVS-CUDA12-4-0-conda:
+    name: Linux x86_64 GPU w/ cuVS conda (CUDA 12.4.0)
+    runs-on: 4-core-ubuntu-gpu-t4
+    env:
+      CUDA_ARCHS: "70-real;72-real;75-real;80;86-real"
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          fetch-tags: true
+      - uses: ./.github/actions/build_conda
+        with:
+          cuvs: "ON"
+          cuda: "12.4.0"
   windows-x86_64-conda:
     name: Windows x86_64 (conda)
     needs: linux-x86_64-cmake
diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml
index ecc87daa64..3b434d8a75 100644
--- a/conda/faiss-gpu-cuvs/meta.yaml
+++ b/conda/faiss-gpu-cuvs/meta.yaml
@@ -7,10 +7,10 @@
 {% set suffix = "_nightly" if environ.get('PACKAGE_TYPE') == 'nightly' else "" %}
 {% set number = GIT_DESCRIBE_NUMBER %}
 {% if cudatoolkit == '11.8.0' %}
-{% set cuda_constraints=">=11.8,<12" %}
-{% set libcublas_constraints=">=11.11,<12" %}
+{% set cuda_constraints=">=11.4,<=11.8" %}
+{% set libcublas_constraints=">=11.6,<12" %}
 {% elif cudatoolkit == '12.4.0' %}
-{% set cuda_constraints=">=12.1,<13" %}
+{% set cuda_constraints=">=12.1,<=12.5" %}
 {% set libcublas_constraints=">=12.1,<13" %}
 {% endif %}
 
@@ -59,7 +59,7 @@ outputs:
         - mkl =2023  # [x86_64]
         - openblas =0.3 # [not x86_64]
         - libcuvs =24.08
-        - cuda-version {{ cudatoolkit }}
+        - cuda-version {{ cuda_constraints }}
       run:
         - _openmp_mutex =4.5=2_kmp_llvm  # [x86_64]
         - mkl =2023  # [x86_64]
@@ -67,7 +67,7 @@ outputs:
         - cuda-cudart {{ cuda_constraints }}
         - libcublas {{ libcublas_constraints }}
         - libcuvs =24.08
-        - cuda-version {{ cudatoolkit }}
+        - cuda-version {{ cuda_constraints }}
     test:
       requires:
         - conda-build

From 6f5ef33bd5013abea4ca57551d5df48ebeb06d12 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Tue, 17 Dec 2024 19:39:37 -0800
Subject: [PATCH 35/51] INSTALL.md

---
 INSTALL.md | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index d66e543916..715ae74d70 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -26,7 +26,7 @@ $ conda install -c pytorch -c nvidia -c rapidsai -c conda-forge faiss-gpu-raft=1
 For faiss-gpu, the nvidia channel is required for CUDA, which is not
 published in the main anaconda channel.
 
-For faiss-gpu-cuvs, the rapidsai and conda-forge channels are required.
+For faiss-gpu-cuvs, the rapidsai, conda-forge and nvidia channels are required.
 
 Nightly pre-release packages can be installed as follows:
 
@@ -38,7 +38,7 @@ $ conda install -c pytorch/label/nightly faiss-cpu
 $ conda install -c pytorch/label/nightly -c nvidia faiss-gpu=1.9.0
 
 # GPU(+CPU) version with NVIDIA cuVS
-conda install -c pytorch -c rapidsai -c conda-forge faiss-gpu-cuvs pytorch pytorch-cuda numpy
+conda install -c pytorch -c rapidsai -c conda-forge -c nvidia faiss-gpu-cuvs pytorch pytorch-cuda numpy
 
 # GPU(+CPU) version using AMD ROCm not yet available
 ```
@@ -95,11 +95,23 @@ The optional requirements are:
   - the CUDA toolkit,
 - for AMD GPUs:
   - AMD ROCm,
+- for using NVIDIA cuVS implementations:
+  - libcuvs=24.08
 - for the python bindings:
   - python 3,
   - numpy,
   - and swig.
 
+To install the libcuvs optional dependency:
+1. With CUDA 12.4
+```
+conda install -c rapidsai -c conda-forge libcuvs=24.08 cuda-version=12.4
+```
+2. With CUDA 11.8
+```
+conda install -c rapidsai -c conda-forge -c nvidia libcuvs=24.08 cuda-version=11.8 
+```
+
 Indications for specific configurations are available in the [troubleshooting
 section of the wiki](https://github.com/facebookresearch/faiss/wiki/Troubleshooting).
 
@@ -118,10 +130,10 @@ Several options can be passed to CMake, among which:
   values are `ON` and `OFF`),
   - `-DFAISS_ENABLE_PYTHON=OFF` in order to disable building python bindings
   (possible values are `ON` and `OFF`),
+  `-DFAISS_ENABLE_GPU` must be `ON` when using this option. (possible values are `ON` and `OFF`),
   - `-DFAISS_ENABLE_CUVS=ON` in order to enable building the cuVS implementations
     of the IVF-Flat, IVF-PQ and CAGRA GPU-accelerated indices (default is `ON`, possible
-    values are `ON` and `OFF`)
-  `-DFAISS_ENABLE_GPU` must be `ON` when using this option. (possible values are `ON` and `OFF`),
+    values are `ON` and `OFF`). Ensure that `-DFAISS_ENABLE_GPU` is set to `ON` when enabling this option
   - `-DBUILD_TESTING=OFF` in order to disable building C++ tests,
   - `-DBUILD_SHARED_LIBS=ON` in order to build a shared library (possible values
   are `ON` and `OFF`),

From 7641a32b002dc2bc33acb9c53ec2e7d86721b82e Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Tue, 17 Dec 2024 19:54:05 -0800
Subject: [PATCH 36/51] cudart cannot be newer than toolkit

---
 conda/faiss-gpu-cuvs/meta.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml
index 3b434d8a75..044ada9a55 100644
--- a/conda/faiss-gpu-cuvs/meta.yaml
+++ b/conda/faiss-gpu-cuvs/meta.yaml
@@ -10,7 +10,7 @@
 {% set cuda_constraints=">=11.4,<=11.8" %}
 {% set libcublas_constraints=">=11.6,<12" %}
 {% elif cudatoolkit == '12.4.0' %}
-{% set cuda_constraints=">=12.1,<=12.5" %}
+{% set cuda_constraints=">=12.1,<=12.4" %}
 {% set libcublas_constraints=">=12.1,<13" %}
 {% endif %}
 

From a16f20dd658777851175c62e98cbac652afbc5e5 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Wed, 18 Dec 2024 10:22:55 -0800
Subject: [PATCH 37/51] strict inequality in cuda_constraints

---
 conda/faiss-gpu-cuvs/meta.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml
index 044ada9a55..eacaf33b9d 100644
--- a/conda/faiss-gpu-cuvs/meta.yaml
+++ b/conda/faiss-gpu-cuvs/meta.yaml
@@ -7,10 +7,10 @@
 {% set suffix = "_nightly" if environ.get('PACKAGE_TYPE') == 'nightly' else "" %}
 {% set number = GIT_DESCRIBE_NUMBER %}
 {% if cudatoolkit == '11.8.0' %}
-{% set cuda_constraints=">=11.4,<=11.8" %}
+{% set cuda_constraints=">=11.4,<12" %}
 {% set libcublas_constraints=">=11.6,<12" %}
 {% elif cudatoolkit == '12.4.0' %}
-{% set cuda_constraints=">=12.1,<=12.4" %}
+{% set cuda_constraints=">=12.1,<12.5" %}
 {% set libcublas_constraints=">=12.1,<13" %}
 {% endif %}
 

From e9eb4b2a813e9ff71f77de32f043b23b7a7ed534 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Fri, 20 Dec 2024 11:39:23 -0800
Subject: [PATCH 38/51] cuvs installation

---
 INSTALL.md | 32 ++++++++++++++++--------------
 build.sh   | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+), 15 deletions(-)
 create mode 100755 build.sh

diff --git a/INSTALL.md b/INSTALL.md
index 715ae74d70..b789fbb328 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -6,7 +6,7 @@ pre-release nightly builds.
 
 - The CPU-only faiss-cpu conda package is currently available on Linux (x86-64 and aarch64), OSX (arm64 only), and Windows (x86-64)
 - faiss-gpu, containing both CPU and GPU indices, is available on Linux (x86-64 only) for CUDA 11.4 and 12.1
-- faiss-gpu-cuvs containing GPU indices provided by [NVIDIA cuVS](https://github.com/rapidsai/cuvs/), is available on Linux (x86-64 only) for CUDA 11.8 and 12.4.
+- faiss-gpu-cuvs nightly package containing GPU indices provided by [NVIDIA cuVS](https://github.com/rapidsai/cuvs/) 24.12, is available on Linux (x86-64 only) for CUDA 11.8 and 12.4. 
 
 To install the latest stable release:
 
@@ -68,7 +68,7 @@ $ conda install -c conda-forge faiss-cpu
 # GPU version
 $ conda install -c conda-forge faiss-gpu
 
-# AMD ROCm version not yet available
+# NVIDIA cuVS and AMD ROCm version not yet available
 ```
 
 You can tell which channel your conda packages come from by using `conda list`.
@@ -96,24 +96,27 @@ The optional requirements are:
 - for AMD GPUs:
   - AMD ROCm,
 - for using NVIDIA cuVS implementations:
-  - libcuvs=24.08
+  - libcuvs=24.12
 - for the python bindings:
   - python 3,
   - numpy,
   - and swig.
 
-To install the libcuvs optional dependency:
-1. With CUDA 12.4
+Indications for specific configurations are available in the [troubleshooting
+section of the wiki](https://github.com/facebookresearch/faiss/wiki/Troubleshooting).
+
+### Building with NVIDIA cuVS
+
+The libcuvs dependency should be installed via conda:
+1. With CUDA 12.0 - 12.5:
 ```
-conda install -c rapidsai -c conda-forge libcuvs=24.08 cuda-version=12.4
+conda install -c rapidsai -c conda-forge -c nvidia libcuvs=24.12 'cuda-version>=12.0,<=12.5'
 ```
-2. With CUDA 11.8
+1. With CUDA 11.4 - 11.8
 ```
-conda install -c rapidsai -c conda-forge -c nvidia libcuvs=24.08 cuda-version=11.8 
+conda install -c rapidsai -c conda-forge -c nvidia libcuvs=24.12 'cuda-version>=11.4,<=11.8'
 ```
-
-Indications for specific configurations are available in the [troubleshooting
-section of the wiki](https://github.com/facebookresearch/faiss/wiki/Troubleshooting).
+For more ways to install cuVS 24.12, refer to the [RAPIDS Installation Guide](https://docs.rapids.ai/install).
 
 ## Step 1: invoking CMake
 
@@ -130,10 +133,9 @@ Several options can be passed to CMake, among which:
   values are `ON` and `OFF`),
   - `-DFAISS_ENABLE_PYTHON=OFF` in order to disable building python bindings
   (possible values are `ON` and `OFF`),
-  `-DFAISS_ENABLE_GPU` must be `ON` when using this option. (possible values are `ON` and `OFF`),
-  - `-DFAISS_ENABLE_CUVS=ON` in order to enable building the cuVS implementations
-    of the IVF-Flat, IVF-PQ and CAGRA GPU-accelerated indices (default is `ON`, possible
-    values are `ON` and `OFF`). Ensure that `-DFAISS_ENABLE_GPU` is set to `ON` when enabling this option
+  - `-DFAISS_ENABLE_CUVS=ON` in order to use the NVIDIA cuVS implementations
+    of the IVF-Flat, IVF-PQ and [CAGRA](https://arxiv.org/pdf/2308.15136) GPU-accelerated indices (default is `ON`, possible, values are `ON` and `OFF`).
+    Note: `-DFAISS_ENABLE_GPU` must be set to `ON` when enabling this option.
   - `-DBUILD_TESTING=OFF` in order to disable building C++ tests,
   - `-DBUILD_SHARED_LIBS=ON` in order to build a shared library (possible values
   are `ON` and `OFF`),
diff --git a/build.sh b/build.sh
new file mode 100755
index 0000000000..911383164c
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+# NOTE: This file is temporary for the proof-of-concept branch and will be removed before this PR is merged
+
+BUILD_TYPE=Release
+BUILD_DIR=build/
+
+RAFT_REPO_REL=""
+EXTRA_CMAKE_ARGS=""
+set -e
+
+if [[ ${RAFT_REPO_REL} != "" ]]; then
+  RAFT_REPO_PATH="`readlink -f \"${RAFT_REPO_REL}\"`"
+  EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCPM_raft_SOURCE=${RAFT_REPO_PATH}"
+fi
+
+if [ "$1" == "clean" ]; then
+  rm -rf build
+  rm -rf .cache
+  exit 0
+fi
+
+if [ "$1" == "test" ]; then
+  make -C build -j test
+  exit 0
+fi
+
+if [ "$1" == "test-raft" ]; then
+  ./build/faiss/gpu/test/TestRaftIndexIVFFlat
+  exit 0
+fi
+
+mkdir -p $BUILD_DIR
+cd $BUILD_DIR
+
+cmake \
+ -DFAISS_ENABLE_GPU=ON \
+ -DFAISS_ENABLE_CUVS=ON \
+ -DFAISS_ENABLE_PYTHON=ON \
+ -DBUILD_TESTING=ON \
+ -DBUILD_SHARED_LIBS=ON \
+ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
+ -DFAISS_OPT_LEVEL=avx2 \
+ -DRAFT_NVTX=OFF \
+ -DCMAKE_CUDA_ARCHITECTURES="NATIVE" \
+ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
+ ${EXTRA_CMAKE_ARGS} \
+ ../
+
+#   -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
+#  -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+#  -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+
+# make -C build -j12 faiss
+cmake  --build . -j12
+# make -C build -j12 swigfaiss
+# (cd build/faiss/python && python setup.py install)
+

From 29ceb606a2ae7e5bd942dbcb1ba8314a9165ff7b Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Fri, 20 Dec 2024 11:40:42 -0800
Subject: [PATCH 39/51] rm build.sh

---
 build.sh | 58 --------------------------------------------------------
 1 file changed, 58 deletions(-)
 delete mode 100755 build.sh

diff --git a/build.sh b/build.sh
deleted file mode 100755
index 911383164c..0000000000
--- a/build.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/bash
-
-# NOTE: This file is temporary for the proof-of-concept branch and will be removed before this PR is merged
-
-BUILD_TYPE=Release
-BUILD_DIR=build/
-
-RAFT_REPO_REL=""
-EXTRA_CMAKE_ARGS=""
-set -e
-
-if [[ ${RAFT_REPO_REL} != "" ]]; then
-  RAFT_REPO_PATH="`readlink -f \"${RAFT_REPO_REL}\"`"
-  EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCPM_raft_SOURCE=${RAFT_REPO_PATH}"
-fi
-
-if [ "$1" == "clean" ]; then
-  rm -rf build
-  rm -rf .cache
-  exit 0
-fi
-
-if [ "$1" == "test" ]; then
-  make -C build -j test
-  exit 0
-fi
-
-if [ "$1" == "test-raft" ]; then
-  ./build/faiss/gpu/test/TestRaftIndexIVFFlat
-  exit 0
-fi
-
-mkdir -p $BUILD_DIR
-cd $BUILD_DIR
-
-cmake \
- -DFAISS_ENABLE_GPU=ON \
- -DFAISS_ENABLE_CUVS=ON \
- -DFAISS_ENABLE_PYTHON=ON \
- -DBUILD_TESTING=ON \
- -DBUILD_SHARED_LIBS=ON \
- -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
- -DFAISS_OPT_LEVEL=avx2 \
- -DRAFT_NVTX=OFF \
- -DCMAKE_CUDA_ARCHITECTURES="NATIVE" \
- -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
- ${EXTRA_CMAKE_ARGS} \
- ../
-
-#   -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
-#  -DCMAKE_C_COMPILER_LAUNCHER=ccache \
-#  -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-
-# make -C build -j12 faiss
-cmake  --build . -j12
-# make -C build -j12 swigfaiss
-# (cd build/faiss/python && python setup.py install)
-

From cd06f39ed60fa6eafd12dbca77a7f7527876a48a Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Fri, 20 Dec 2024 11:41:42 -0800
Subject: [PATCH 40/51] rm cuvs conda checks in ci

---
 .github/workflows/build-pull-request.yml | 30 ------------------------
 1 file changed, 30 deletions(-)

diff --git a/.github/workflows/build-pull-request.yml b/.github/workflows/build-pull-request.yml
index f94077513d..d94abba081 100644
--- a/.github/workflows/build-pull-request.yml
+++ b/.github/workflows/build-pull-request.yml
@@ -132,36 +132,6 @@ jobs:
           fetch-tags: true
       - name: Build and Package (conda)
         uses: ./.github/actions/build_conda
-  linux-x86_64-GPU-CUVS-CUDA11-8-0-conda:
-    name: Linux x86_64 GPU w/ cuVS conda (CUDA 11.8.0)
-    runs-on: 4-core-ubuntu-gpu-t4
-    env:
-      CUDA_ARCHS: "70-real;72-real;75-real;80;86-real"
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-          fetch-tags: true
-      - uses: ./.github/actions/build_conda
-        with:
-          cuvs: "ON"
-          cuda: "11.8.0"
-  linux-x86_64-GPU-CUVS-CUDA12-4-0-conda:
-    name: Linux x86_64 GPU w/ cuVS conda (CUDA 12.4.0)
-    runs-on: 4-core-ubuntu-gpu-t4
-    env:
-      CUDA_ARCHS: "70-real;72-real;75-real;80;86-real"
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-          fetch-tags: true
-      - uses: ./.github/actions/build_conda
-        with:
-          cuvs: "ON"
-          cuda: "12.4.0"
   windows-x86_64-conda:
     name: Windows x86_64 (conda)
     needs: linux-x86_64-cmake

From bb11d0442e68bf2bfdcd8618a7d9aa58131f7d15 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Fri, 20 Dec 2024 12:01:28 -0800
Subject: [PATCH 41/51] nightly prefix

---
 INSTALL.md | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index b789fbb328..13f6818921 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -26,7 +26,7 @@ $ conda install -c pytorch -c nvidia -c rapidsai -c conda-forge faiss-gpu-raft=1
 For faiss-gpu, the nvidia channel is required for CUDA, which is not
 published in the main anaconda channel.
 
-For faiss-gpu-cuvs, the rapidsai, conda-forge and nvidia channels are required.
+For faiss-gpu-cuvs, the rapidsai, conda-forge and nvidia channels are required. The `cuda-version` package is used to identify the CUDA version of the nightly.
 
 Nightly pre-release packages can be installed as follows:
 
@@ -37,8 +37,11 @@ $ conda install -c pytorch/label/nightly faiss-cpu
 # GPU(+CPU) version
 $ conda install -c pytorch/label/nightly -c nvidia faiss-gpu=1.9.0
 
-# GPU(+CPU) version with NVIDIA cuVS
-conda install -c pytorch -c rapidsai -c conda-forge -c nvidia faiss-gpu-cuvs pytorch pytorch-cuda numpy
+# GPU(+CPU) version with NVIDIA cuVS (package built with CUDA 12.4)
+conda install -c pytorch/label/nightly -c rapidsai -c conda-forge -c nvidia faiss-gpu-cuvs 'cuda-version>=12.0,<=12.5'
+
+# GPU(+CPU) version with NVIDIA cuVS (package built with CUDA 11.8)
+conda install -c pytorch/label/nightly -c rapidsai -c conda-forge -c nvidia faiss-gpu-cuvs 'cuda-version>=11.4,<=11.8'
 
 # GPU(+CPU) version using AMD ROCm not yet available
 ```

From 2d5a02572c0dd48ed9b595d6d1cd2641f96cd7e2 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Fri, 20 Dec 2024 13:26:04 -0800
Subject: [PATCH 42/51] footbote

---
 INSTALL.md | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index 13f6818921..faeacac41d 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -6,7 +6,7 @@ pre-release nightly builds.
 
 - The CPU-only faiss-cpu conda package is currently available on Linux (x86-64 and aarch64), OSX (arm64 only), and Windows (x86-64)
 - faiss-gpu, containing both CPU and GPU indices, is available on Linux (x86-64 only) for CUDA 11.4 and 12.1
-- faiss-gpu-cuvs nightly package containing GPU indices provided by [NVIDIA cuVS](https://github.com/rapidsai/cuvs/) 24.12, is available on Linux (x86-64 only) for CUDA 11.8 and 12.4. 
+- faiss-gpu-raft [^1] package containing GPU indices provided by [NVIDIA RAFT](https://github.com/rapidsai/raft/) version 24.06, is available on Linux (x86-64 only) for CUDA 11.8 and 12.4. 
 
 To install the latest stable release:
 
@@ -23,10 +23,11 @@ $ conda install -c pytorch -c nvidia -c rapidsai -c conda-forge faiss-gpu-raft=1
 # GPU(+CPU) version using AMD ROCm not yet available
 ```
 
-For faiss-gpu, the nvidia channel is required for CUDA, which is not
-published in the main anaconda channel.
+For faiss-gpu, the nvidia channel is required for CUDA, which is not published in the main anaconda channel.
 
-For faiss-gpu-cuvs, the rapidsai, conda-forge and nvidia channels are required. The `cuda-version` package is used to identify the CUDA version of the nightly.
+For faiss-gpu-raft, the rapidsai, conda-forge and nvidia channels are required.
+
+[^1] The vector search and clustering algorithms in NVIDIA RAFT have been formally migrated to [NVIDIA cuVS](https://github.com/rapidsai/cuvs). This package is being renamed to `faiss-gpu-cuvs` in the next stable release, which will use these GPU implementations from the pre-compiled `libcuvs=24.12` binary.
 
 Nightly pre-release packages can be installed as follows:
 

From 0dcce1c2216e97ded535f57d4b643da628198dc6 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Fri, 20 Dec 2024 13:39:29 -0800
Subject: [PATCH 43/51] correct footnote

---
 INSTALL.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index faeacac41d..f15154ad30 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -27,8 +27,6 @@ For faiss-gpu, the nvidia channel is required for CUDA, which is not published i
 
 For faiss-gpu-raft, the rapidsai, conda-forge and nvidia channels are required.
 
-[^1] The vector search and clustering algorithms in NVIDIA RAFT have been formally migrated to [NVIDIA cuVS](https://github.com/rapidsai/cuvs). This package is being renamed to `faiss-gpu-cuvs` in the next stable release, which will use these GPU implementations from the pre-compiled `libcuvs=24.12` binary.
-
 Nightly pre-release packages can be installed as follows:
 
 ``` shell
@@ -116,7 +114,7 @@ The libcuvs dependency should be installed via conda:
 ```
 conda install -c rapidsai -c conda-forge -c nvidia libcuvs=24.12 'cuda-version>=12.0,<=12.5'
 ```
-1. With CUDA 11.4 - 11.8
+2. With CUDA 11.4 - 11.8
 ```
 conda install -c rapidsai -c conda-forge -c nvidia libcuvs=24.12 'cuda-version>=11.4,<=11.8'
 ```
@@ -315,3 +313,5 @@ and you can run
 $ python demos/demo_auto_tune.py
 ```
 to test the GPU code.
+
+[^1]: The vector search and clustering algorithms in NVIDIA RAFT have been formally migrated to [NVIDIA cuVS](https://github.com/rapidsai/cuvs). This package is being renamed to `faiss-gpu-cuvs` in the next stable release, which will use these GPU implementations from the pre-compiled `libcuvs=24.12` binary.

From 5f5af9df83fdc62eb9411d09156cf040eb7cd351 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Fri, 20 Dec 2024 13:40:32 -0800
Subject: [PATCH 44/51] FAISS_ENABLE_CUVS default is OFF

---
 INSTALL.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/INSTALL.md b/INSTALL.md
index f15154ad30..404649c109 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -136,7 +136,7 @@ Several options can be passed to CMake, among which:
   - `-DFAISS_ENABLE_PYTHON=OFF` in order to disable building python bindings
   (possible values are `ON` and `OFF`),
   - `-DFAISS_ENABLE_CUVS=ON` in order to use the NVIDIA cuVS implementations
-    of the IVF-Flat, IVF-PQ and [CAGRA](https://arxiv.org/pdf/2308.15136) GPU-accelerated indices (default is `ON`, possible, values are `ON` and `OFF`).
+    of the IVF-Flat, IVF-PQ and [CAGRA](https://arxiv.org/pdf/2308.15136) GPU-accelerated indices (default is `OFF`, possible, values are `ON` and `OFF`).
     Note: `-DFAISS_ENABLE_GPU` must be set to `ON` when enabling this option.
   - `-DBUILD_TESTING=OFF` in order to disable building C++ tests,
   - `-DBUILD_SHARED_LIBS=ON` in order to build a shared library (possible values

From ae60e9ed9588c189204d253c1105bb37ed5bdfe0 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Fri, 20 Dec 2024 15:10:07 -0800
Subject: [PATCH 45/51] reference issue

---
 faiss/gpu/impl/CuvsIVFFlat.cu | 1 +
 faiss/gpu/impl/CuvsIVFPQ.cu   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/faiss/gpu/impl/CuvsIVFFlat.cu b/faiss/gpu/impl/CuvsIVFFlat.cu
index ef6e1a3d85..2cccee8605 100644
--- a/faiss/gpu/impl/CuvsIVFFlat.cu
+++ b/faiss/gpu/impl/CuvsIVFFlat.cu
@@ -291,6 +291,7 @@ void CuvsIVFFlat::searchPreassigned(
         Tensor<idx_t, 2, true>& outIndices,
         bool storePairs) {
     // TODO: Fill this in!
+    // Reference issue: https://github.com/facebookresearch/faiss/issues/3243
     FAISS_THROW_MSG("searchPreassigned is not implemented for cuVS index");
 }
 
diff --git a/faiss/gpu/impl/CuvsIVFPQ.cu b/faiss/gpu/impl/CuvsIVFPQ.cu
index 7172dad9b4..1e2fef225d 100644
--- a/faiss/gpu/impl/CuvsIVFPQ.cu
+++ b/faiss/gpu/impl/CuvsIVFPQ.cu
@@ -229,6 +229,7 @@ void CuvsIVFPQ::searchPreassigned(
         Tensor<idx_t, 2, true>& outIndices,
         bool storePairs) {
     // TODO: Fill this in!
+    // Reference issue: https://github.com/facebookresearch/faiss/issues/3243
     FAISS_THROW_MSG("searchPreassigned is not implemented for cuVS index");
 }
 

From 8d622bcc85ab695221a84a3749cca7fd5f3a95c5 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Fri, 20 Dec 2024 20:17:22 -0800
Subject: [PATCH 46/51] update nightly install commands

---
 INSTALL.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index 404649c109..89d3f15280 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -37,10 +37,11 @@ $ conda install -c pytorch/label/nightly faiss-cpu
 $ conda install -c pytorch/label/nightly -c nvidia faiss-gpu=1.9.0
 
 # GPU(+CPU) version with NVIDIA cuVS (package built with CUDA 12.4)
-conda install -c pytorch/label/nightly -c rapidsai -c conda-forge -c nvidia faiss-gpu-cuvs 'cuda-version>=12.0,<=12.5'
+conda install -c pytorch -c rapidsai -c conda-forge -c nvidia pytorch/label/nightly::faiss-gpu-cuvs 'cuda-version>=12.0,<=12.5'
 
 # GPU(+CPU) version with NVIDIA cuVS (package built with CUDA 11.8)
-conda install -c pytorch/label/nightly -c rapidsai -c conda-forge -c nvidia faiss-gpu-cuvs 'cuda-version>=11.4,<=11.8'
+conda install -c pytorch -c rapidsai -c conda-forge -c nvidia pytorch/label/nightly::faiss-gpu-cuvs 
+'cuda-version>=11.4,<=11.8'
 
 # GPU(+CPU) version using AMD ROCm not yet available
 ```

From 9c6ece602a87094a42f0fbae9a94a5abee4482ec Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Fri, 20 Dec 2024 20:22:32 -0800
Subject: [PATCH 47/51] newline character

---
 INSTALL.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index 89d3f15280..d3908be97a 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -40,8 +40,7 @@ $ conda install -c pytorch/label/nightly -c nvidia faiss-gpu=1.9.0
 conda install -c pytorch -c rapidsai -c conda-forge -c nvidia pytorch/label/nightly::faiss-gpu-cuvs 'cuda-version>=12.0,<=12.5'
 
 # GPU(+CPU) version with NVIDIA cuVS (package built with CUDA 11.8)
-conda install -c pytorch -c rapidsai -c conda-forge -c nvidia pytorch/label/nightly::faiss-gpu-cuvs 
-'cuda-version>=11.4,<=11.8'
+conda install -c pytorch -c rapidsai -c conda-forge -c nvidia pytorch/label/nightly::faiss-gpu-cuvs 'cuda-version>=11.4,<=11.8'
 
 # GPU(+CPU) version using AMD ROCm not yet available
 ```

From 8093ed55625af1d78b5526389b7ede1f8c9f3f65 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Fri, 3 Jan 2025 03:57:03 -0800
Subject: [PATCH 48/51] benchmarking scripts

---
 INSTALL.md                       |   2 +
 benchs/bench_ivfflat_cuvs.py     | 175 +++++++++++---------------
 benchs/bench_ivfpq_cuvs.py       | 206 +++++++++++++++++--------------
 faiss/gpu/GpuIndexIVFFlat.cu     |   3 -
 faiss/gpu/test/test_gpu_index.py |   3 +-
 5 files changed, 188 insertions(+), 201 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index d3908be97a..f353aabcbe 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -109,6 +109,8 @@ section of the wiki](https://github.com/facebookresearch/faiss/wiki/Troubleshoot
 
 ### Building with NVIDIA cuVS
 
+[cuVS](https://docs.rapids.ai/api/cuvs/nightly/) contains state-of-the-art implementations of several algorithms for running approximate nearest neighbors and clustering on the GPU. It is built on top of the [RAPIDS RAFT](https://github.com/rapidsai/raft) library of high performance machine learning primitives. Building FAISS with cuVS enabled allows a user to choose between regular GPU implementations in FAISS and cuVS implementations for specific algorithms.
+
 The libcuvs dependency should be installed via conda:
 1. With CUDA 12.0 - 12.5:
 ```
diff --git a/benchs/bench_ivfflat_cuvs.py b/benchs/bench_ivfflat_cuvs.py
index 3628ec7422..0e3f74207f 100644
--- a/benchs/bench_ivfflat_cuvs.py
+++ b/benchs/bench_ivfflat_cuvs.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 #
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -25,6 +25,28 @@
 import argparse
 import rmm
 
+try:
+    from faiss.contrib.datasets_fb import \
+        DatasetSIFT1M, DatasetDeep1B, DatasetBigANN
+except ImportError:
+    from faiss.contrib.datasets import \
+        DatasetSIFT1M, DatasetDeep1B, DatasetBigANN
+
+
+# ds = DatasetDeep1B(10**6)
+# ds = DatasetBigANN(nb_M=1)
+ds = DatasetSIFT1M()
+
+xq = ds.get_queries()
+xb = ds.get_database()
+gt = ds.get_groundtruth()
+
+xt = ds.get_train()
+
+nb, d = xb.shape
+nq, d = xq.shape
+nt, d = xt.shape
+
 ######################################################
 # Command-line parsing
 ######################################################
@@ -38,25 +60,23 @@ def aa(*args, **kwargs):
 
 group = parser.add_argument_group('benchmarking options')
 
-aa('--bm_train', default=False, action='store_true',
+aa('--bm_train', default=True,
    help='whether to benchmark train operation on GPU index')
-aa('--bm_add', default=False, action='store_true',
+aa('--bm_add', default=True,
    help='whether to benchmark add operation on GPU index')
 aa('--bm_search', default=True,
    help='whether to benchmark search operation on GPU index')
-aa('--cuvs_only', default=False, action='store_true',
-   help='whether to only produce cuVS enabled benchmarks')
 
 
 group = parser.add_argument_group('IVF options')
-aa('--n_centroids', default=256, type=int,
+aa('--nlist', default=1024, type=int,
     help="number of IVF centroids")
 
 
 group = parser.add_argument_group('searching')
 
-aa('--k', default=100, type=int, help='nb of nearest neighbors')
-aa('--nprobe', default=50, help='nb of IVF lists to probe')
+aa('--k', default=10, type=int, help='nb of nearest neighbors')
+aa('--nprobe', default=10, help='nb of IVF lists to probe')
 
 args = parser.parse_args()
 
@@ -70,42 +90,38 @@ def aa(*args, **kwargs):
 mr = rmm.mr.PoolMemoryResource(rmm.mr.CudaMemoryResource())
 rmm.mr.set_current_device_resource(mr)
 
-def bench_train_milliseconds(index, trainVecs, use_cuvs):
-    co = faiss.GpuMultipleClonerOptions()
-    co.use_cuvs = use_cuvs
-    index_gpu = faiss.index_cpu_to_gpu(res, 0, index, co)
+
+def bench_train_milliseconds(trainVecs, ncols, nlist, use_cuvs):
+    config = faiss.GpuIndexIVFFlatConfig()
+    config.use_cuvs = use_cuvs
+    index = faiss.GpuIndexIVFFlat(res, ncols, nlist, faiss.METRIC_L2, config)
     t0 = time.time()
-    index_gpu.train(trainVecs)
+    index.train(trainVecs)
     return 1000*(time.time() - t0)
 
 
+#warmup
+xw = rs.rand(nt, d)
+bench_train_milliseconds(xw, d, args.nlist, True)
+
+
 if args.bm_train:
     print("=" * 40)
     print("GPU Train Benchmarks")
     print("=" * 40)
-    trainset_sizes = [5000, 10000, 100000, 1000000, 5000000]
-    dataset_dims = [128, 256, 1024]
-    for n_rows in trainset_sizes:
-        for n_cols in dataset_dims:
-            index = faiss.index_factory(n_cols, "IVF{},Flat".format(args.n_centroids))
-            trainVecs = rs.rand(n_rows, n_cols).astype('float32')
-            cuvs_gpu_train_time = bench_train_milliseconds(
-                index, trainVecs, True)
-            if args.cuvs_only:
-                print("Method: IVFFlat, Operation: TRAIN, dim: %d, n_centroids %d, numTrain: %d, cuVS enabled GPU train time: %.3f milliseconds" % (
-                    n_cols, args.n_centroids, n_rows, cuvs_gpu_train_time))
-            else:
-                classical_gpu_train_time = bench_train_milliseconds(
-                    index, trainVecs, False)
-                print("Method: IVFFlat, Operation: TRAIN, dim: %d, n_centroids %d, numTrain: %d, classical GPU train time: %.3f milliseconds, cuVS enabled GPU train time: %.3f milliseconds" % (
-                    n_cols, args.n_centroids, n_rows, classical_gpu_train_time, cuvs_gpu_train_time))
-
-
-def bench_add_milliseconds(index, addVecs, use_cuvs):
-    co = faiss.GpuMultipleClonerOptions()
-    co.use_cuvs = use_cuvs
-    index_gpu = faiss.index_cpu_to_gpu(res, 0, index, co)
-    index_gpu.copyFrom(index)
+
+    cuvs_gpu_train_time = bench_train_milliseconds(xt, d, args.nlist, True)
+    classical_gpu_train_time = bench_train_milliseconds(xt, d, args.nlist, False)
+    print("Method: IVFFlat, Operation: TRAIN, dim: %d, nlist %d, numTrain: %d, classical GPU train time: %.3f milliseconds, cuVS enabled GPU train time: %.3f milliseconds" % (
+        d, args.nlist, nt, classical_gpu_train_time, cuvs_gpu_train_time))
+
+
+def bench_add_milliseconds(addVecs, q, use_cuvs):
+    # construct a GPU index using the same trained coarse quantizer
+    config = faiss.GpuIndexIVFFlatConfig()
+    config.use_cuvs = use_cuvs
+    index_gpu = faiss.GpuIndexIVFFlat(res, q, d, args.nlist, faiss.METRIC_L2, config)
+    assert(index_gpu.is_trained)
     t0 = time.time()
     index_gpu.add(addVecs)
     return 1000*(time.time() - t0)
@@ -115,33 +131,19 @@ def bench_add_milliseconds(index, addVecs, use_cuvs):
     print("=" * 40)
     print("GPU Add Benchmarks")
     print("=" * 40)
-    addset_sizes = [5000, 10000, 100000, 1000000]
-    dataset_dims = [128, 256, 1024]
-    n_train = 10000
-    trainVecs = rs.rand(n_train, n_cols).astype('float32')
-    index = faiss.index_factory(
-        n_cols, "IVF" + str(args.n_centroids) + ",Flat")
-    index.train(trainVecs)
-    for n_rows in addset_sizes:
-        for n_cols in dataset_dims:
-            addVecs = rs.rand(n_rows, n_cols).astype('float32')
-            cuvs_gpu_add_time = bench_add_milliseconds(index, addVecs, True)
-            if args.cuvs_only:
-                print("Method: IVFFlat, Operation: ADD, dim: %d, n_centroids %d, numAdd: %d, cuVS enabled GPU add time: %.3f milliseconds" % (
-                    n_train, n_rows, n_cols, args.n_centroids, cuvs_gpu_add_time))
-            else:
-                classical_gpu_add_time = bench_add_milliseconds(
-                    index, addVecs, False)
-                print("Method: IVFFlat, Operation: ADD, dim: %d, n_centroids %d, numAdd: %d, classical GPU add time: %.3f milliseconds, cuVS enabled GPU add time: %.3f milliseconds" % (
-                    n_train, n_rows, n_cols, args.n_centroids, classical_gpu_add_time, cuvs_gpu_add_time))
-
-
-def bench_search_milliseconds(index, addVecs, queryVecs, nprobe, k, use_cuvs):
-    co = faiss.GpuMultipleClonerOptions()
+    quantizer = faiss.IndexFlatL2(d)
+    idx_cpu = faiss.IndexIVFFlat(quantizer, d, args.nlist)
+    idx_cpu.train(xt)
+    cuvs_gpu_add_time = bench_add_milliseconds(xb, quantizer, True)
+    classical_gpu_add_time = bench_add_milliseconds(xb, quantizer, False)
+    print("Method: IVFFlat, Operation: ADD, dim: %d, nlist %d, numAdd: %d, classical GPU add time: %.3f milliseconds, cuVS enabled GPU add time: %.3f milliseconds" % (
+        d, args.nlist, nb, classical_gpu_add_time, cuvs_gpu_add_time))
+
+
+def bench_search_milliseconds(index, queryVecs, nprobe, k, use_cuvs):
+    co = faiss.GpuClonerOptions()
     co.use_cuvs = use_cuvs
     index_gpu = faiss.index_cpu_to_gpu(res, 0, index, co)
-    index_gpu.copyFrom(index)
-    index_gpu.add(addVecs)
     index_gpu.nprobe = nprobe
     t0 = time.time()
     index_gpu.search(queryVecs, k)
@@ -152,43 +154,14 @@ def bench_search_milliseconds(index, addVecs, queryVecs, nprobe, k, use_cuvs):
     print("=" * 40)
     print("GPU Search Benchmarks")
     print("=" * 40)
-    queryset_sizes = [5000, 10000, 100000, 500000]
-    n_train = 10000
-    n_add = 100000
-    search_bm_dims = [8, 16, 32]
-    for n_cols in search_bm_dims:
-        index = faiss.index_factory(n_cols, "IVF{},Flat".format(args.n_centroids))
-        trainVecs = rs.rand(n_train, n_cols).astype('float32')
-        index.train(trainVecs)
-        addVecs = rs.rand(n_add, n_cols).astype('float32')
-        for n_rows in queryset_sizes:
-            queryVecs = rs.rand(n_rows, n_cols).astype('float32')
-            cuvs_gpu_search_time = bench_search_milliseconds(
-                index, addVecs, queryVecs, args.nprobe, args.k, True)
-            if args.cuvs_only:
-                print("Method: IVFFlat, Operation: SEARCH, dim: %d, n_centroids: %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, cuVS enabled GPU search time: %.3f milliseconds" % (
-                    n_cols, args.n_centroids, n_add, n_rows, args.nprobe, args.k, cuvs_gpu_search_time))
-            else:
-                classical_gpu_search_time = bench_search_milliseconds(
-                    index, addVecs, queryVecs, args.nprobe, args.k, False)
-                print("Method: IVFFlat, Operation: SEARCH, dim: %d, n_centroids: %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, classical GPU search time: %.3f milliseconds, cuVS enabled GPU search time: %.3f milliseconds" % (
-                    n_cols, args.n_centroids, n_add, n_rows, args.nprobe, args.k, classical_gpu_search_time, cuvs_gpu_search_time))
-
-    print("=" * 40)
-    print("Large cuVS Enabled Benchmarks")
-    print("=" * 40)
-    # Avoid classical GPU Benchmarks for large datasets because of OOM for more than 500000 queries and/or large dims as well as for large k
-    queryset_sizes = [100000, 500000, 1000000]
-    large_search_bm_dims = [128, 256, 1024]
-    for n_cols in large_search_bm_dims:
-        trainVecs = rs.rand(n_train, n_cols).astype('float32')
-        index = faiss.index_factory(
-            n_cols, "IVF" + str(args.n_centroids) + ",Flat")
-        index.train(trainVecs)
-        addVecs = rs.rand(n_add, n_cols).astype('float32')
-        for n_rows in queryset_sizes:
-            queryVecs = rs.rand(n_rows, n_cols).astype('float32')
-            cuvs_gpu_search_time = bench_search_milliseconds(
-                index, addVecs, queryVecs, args.nprobe, args.k, True)
-            print("Method: IVFFlat, Operation: SEARCH, numTrain: %d, dim: %d, n_centroids: %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, cuVS enabled GPU search time: %.3f milliseconds" % (
-                n_cols, args.n_centroids, n_add, n_rows, args.nprobe, args.k, cuvs_gpu_search_time))
+    idx_cpu = faiss.IndexIVFFlat(
+            faiss.IndexFlatL2(d), d, args.nlist)
+    idx_cpu.train(xt)
+    idx_cpu.add(xb)
+
+    cuvs_gpu_search_time = bench_search_milliseconds(
+        idx_cpu, xq, args.nprobe, args.k, True)
+    classical_gpu_search_time = bench_search_milliseconds(
+        idx_cpu, xq, args.nprobe, args.k, False)
+    print("Method: IVFFlat, Operation: SEARCH, dim: %d, nlist: %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, classical GPU search time: %.3f milliseconds, cuVS enabled GPU search time: %.3f milliseconds" % (
+        d, args.nlist, nb, nq, args.nprobe, args.k, classical_gpu_search_time, cuvs_gpu_search_time))
diff --git a/benchs/bench_ivfpq_cuvs.py b/benchs/bench_ivfpq_cuvs.py
index 7668afffea..c542fd3c42 100644
--- a/benchs/bench_ivfpq_cuvs.py
+++ b/benchs/bench_ivfpq_cuvs.py
@@ -1,9 +1,10 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
+# @lint-ignore-every LICENSELINT
+# Copyright (c) Meta Platforms, Inc. and its affiliates.
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 #
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -24,39 +25,68 @@
 import argparse
 import rmm
 
+try:
+    from faiss.contrib.datasets_fb import \
+        DatasetSIFT1M, DatasetDeep1B, DatasetBigANN
+except ImportError:
+    from faiss.contrib.datasets import \
+        DatasetSIFT1M, DatasetDeep1B, DatasetBigANN
+
+
+# ds = DatasetDeep1B(10**6)
+# ds = DatasetBigANN(nb_M=1)
+ds = DatasetSIFT1M()
+
+xq = ds.get_queries()
+xb = ds.get_database()
+gt = ds.get_groundtruth()
+
+xt = ds.get_train()
+
+nb, d = xb.shape
+nq, d = xq.shape
+nt, d = xt.shape
+
+M = d / 2
+
 ######################################################
 # Command-line parsing
 ######################################################
 
 parser = argparse.ArgumentParser()
 
-from datasets import load_sift1M, evaluate
-
-
-print("load data")
-xb, xq, xt, gt = load_sift1M()
 
 def aa(*args, **kwargs):
     group.add_argument(*args, **kwargs)
 
 
 group = parser.add_argument_group('benchmarking options')
-aa('--cuvs_only', default=False, action='store_true',
-   help='whether to only produce cuVS enabled benchmarks')
+
+aa('--bm_train', default=True,
+   help='whether to benchmark train operation on GPU index')
+aa('--bm_add', default=True,
+   help='whether to benchmark add operation on GPU index')
+aa('--bm_search', default=True,
+   help='whether to benchmark search operation on GPU index')
+
 
 group = parser.add_argument_group('IVF options')
-aa('--bits_per_code', default=8, type=int, help='bits per code. Note that < 8 is only supported when cuVS is enabled')
-aa('--pq_len', default=2, type=int, help='number of vector elements represented by one PQ code')
-aa('--use_precomputed', default=True, type=bool, help='use precomputed codes (not with cuVS enabled)')
+aa('--nlist', default=1024, type=np.int64,
+    help="number of IVF centroids")
+aa('--bits_per_code', default=8, type=np.int64, help='bits per code. Note that < 8 is only supported when cuVS is enabled')
+
 
 group = parser.add_argument_group('searching')
+
 aa('--k', default=10, type=int, help='nb of nearest neighbors')
-aa('--nprobe', default=50, type=int, help='nb of IVF lists to probe')
+aa('--nprobe', default=10, help='nb of IVF lists to probe')
 
 args = parser.parse_args()
 
 print("args:", args)
 
+gt = gt[:, :args.k]
+
 rs = np.random.RandomState(123)
 
 res = faiss.StandardGpuResources()
@@ -65,104 +95,90 @@ def aa(*args, **kwargs):
 mr = rmm.mr.PoolMemoryResource(rmm.mr.CudaMemoryResource())
 rmm.mr.set_current_device_resource(mr)
 
-# A heuristic to select a suitable number of lists
-def compute_nlist(numVecs):
-    nlist = np.sqrt(numVecs)
-    if (numVecs / nlist < 1000):
-        nlist = numVecs / 1000
-    return int(nlist)
 
+def eval_recall(neighbors, t):
+    speed = t * 1000 / nq
+    qps = 1000 / speed
 
-def bench_train_milliseconds(index, trainVecs, use_cuvs):
-    co = faiss.GpuMultipleClonerOptions()
-    # use float 16 lookup tables to save space
-    co.useFloat16LookupTables = True
-    co.use_cuvs = use_cuvs
-    index_gpu = faiss.index_cpu_to_gpu(res, 0, index, co)
+    corrects = (gt == neighbors).sum()
+    recall = corrects / (nq * args.k)
+
+    return recall, qps
+
+
+def bench_train_milliseconds(trainVecs, use_cuvs):
+    config = faiss.GpuIndexIVFPQConfig()
+    config.use_cuvs = use_cuvs
+    index = faiss.GpuIndexIVFPQ(res, d, 1024, 32, 8, faiss.METRIC_L2, config)
     t0 = time.time()
-    index_gpu.train(trainVecs)
+    index.train(trainVecs)
     return 1000*(time.time() - t0)
 
-n_rows, n_cols = xb.shape
-n_train, _ = xt.shape
-M = n_cols // args.pq_len
-nlist = compute_nlist(n_rows)
-index = faiss.index_factory(n_cols, "IVF{},PQ{}x{}np".format(nlist, M, args.bits_per_code))
-
-print("=" * 40)
-print("GPU Train Benchmarks")
-print("=" * 40)
-cuvs_gpu_train_time = bench_train_milliseconds(index, xt, True)
-if args.cuvs_only:
-    print("Method: IVFPQ, Operation: TRAIN, dim: %d, n_centroids %d, numSubQuantizers %d, bitsPerCode %d, numTrain: %d, cuVS enabled GPU train time: %.3f milliseconds" % (
-        n_cols, nlist, M, args.bits_per_code, n_train, cuvs_gpu_train_time))
-else:
-    classical_gpu_train_time = bench_train_milliseconds(
-        index, xt, False)
-    print("Method: IVFPQ, Operation: TRAIN, dim: %d, n_centroids %d, numSubQuantizers %d, bitsPerCode %d, numTrain: %d, classical GPU train time: %.3f milliseconds, cuVS enabled GPU train time: %.3f milliseconds" % (
-        n_cols, nlist, M, args.bits_per_code, n_train, classical_gpu_train_time, cuvs_gpu_train_time))
-
-
-def bench_add_milliseconds(index, addVecs, use_cuvs):
-    co = faiss.GpuMultipleClonerOptions()
-    # use float 16 lookup tables to save space
-    co.useFloat16LookupTables = True
-    co.use_cuvs = use_cuvs
-    index_gpu = faiss.index_cpu_to_gpu(res, 0, index, co)
-    index_gpu.copyFrom(index)
+
+#warmup
+xw = rs.rand(nt, d)
+bench_train_milliseconds(xw, True)
+
+
+if args.bm_train:
+    print("=" * 40)
+    print("GPU Train Benchmarks")
+    print("=" * 40)
+
+    cuvs_gpu_train_time = bench_train_milliseconds(xt, True)
+    classical_gpu_train_time = bench_train_milliseconds(xt, False)
+    print("TRAIN, dim: %d, nlist %d, numTrain: %d, classical GPU train time: %.3f milliseconds, cuVS enabled GPU train time: %.3f milliseconds" % (
+        d, args.nlist, nt, classical_gpu_train_time, cuvs_gpu_train_time))
+
+
+def bench_add_milliseconds(addVecs, index_cpu, use_cuvs):
+    # construct a GPU index using the same trained coarse quantizer
+    config = faiss.GpuClonerOptions()
+    config.use_cuvs = use_cuvs
+    index_gpu = faiss.index_cpu_to_gpu(res, 0, index_cpu, config)
+    assert(index_gpu.is_trained)
     t0 = time.time()
     index_gpu.add(addVecs)
     return 1000*(time.time() - t0)
 
-print("=" * 40)
-print("GPU Add Benchmarks")
-print("=" * 40)
-index.train(xt)
-cuvs_gpu_add_time = bench_add_milliseconds(index, xb, True)
-if args.cuvs_only:
-    print("Method: IVFPQ, Operation: ADD, dim: %d, n_centroids %d numSubQuantizers %d, bitsPerCode %d, numAdd %d, cuVS enabled GPU add time: %.3f milliseconds" % (
-        n_cols, nlist, M, args.bits_per_code, n_rows, cuvs_gpu_add_time))
-else:
-    classical_gpu_add_time = bench_add_milliseconds(
-        index, xb, False)
-    print("Method: IVFFPQ, Operation: ADD, dim: %d, n_centroids %d, numSubQuantizers %d, bitsPerCode %d, numAdd %d, classical GPU add time: %.3f milliseconds, cuVS enabled GPU add time: %.3f milliseconds" % (
-        n_cols, nlist, M, args.bits_per_code, n_rows, classical_gpu_add_time, cuvs_gpu_add_time))
-
-
-def bench_search_milliseconds(index, addVecs, queryVecs, nprobe, k, use_cuvs):
-    co = faiss.GpuMultipleClonerOptions()
+
+if args.bm_add:
+    print("=" * 40)
+    print("GPU Add Benchmarks")
+    print("=" * 40)
+    quantizer = faiss.IndexFlatL2(d)
+    index_cpu = faiss.IndexIVFPQ(quantizer, d, 1024, 32, 8, faiss.METRIC_L2)
+    index_cpu.train(xt)
+    cuvs_gpu_add_time = bench_add_milliseconds(xb, index_cpu, True)
+    classical_gpu_add_time = bench_add_milliseconds(xb, index_cpu, False)
+    print("ADD, dim: %d, nlist %d, numAdd: %d, classical GPU add time: %.3f milliseconds, cuVS enabled GPU add time: %.3f milliseconds" % (
+        d, args.nlist, nb, classical_gpu_add_time, cuvs_gpu_add_time))
+
+
+def bench_search_milliseconds(index, queryVecs, nprobe, k, use_cuvs):
+    co = faiss.GpuClonerOptions()
     co.use_cuvs = use_cuvs
-    co.useFloat16LookupTables = True
     index_gpu = faiss.index_cpu_to_gpu(res, 0, index, co)
-    index_gpu.copyFrom(index)
-    index_gpu.add(addVecs)
     index_gpu.nprobe = nprobe
     t0 = time.time()
-    index_gpu.search(queryVecs, k)
-    return 1000*(time.time() - t0)
+    _, I = index_gpu.search(queryVecs, k)
+    return I, 1000*(time.time() - t0)
 
 
+# Search benchmarks: both indexes have identical IVF centroids and lists. 
 if args.bm_search:
     print("=" * 40)
     print("GPU Search Benchmarks")
     print("=" * 40)
-    queryset_sizes = [1, 10, 100, 1000, 10000]
-    n_train, n_cols = xt.shape
-    n_add, _ = xb.shape
-    print(xq.shape)
-    M = n_cols // args.pq_len
-    nlist = compute_nlist(n_add)
-    index = faiss.index_factory(n_cols, "IVF{},PQ{}x{}np".format(nlist, M, args.bits_per_code))
-    index.train(xt)
-    for n_rows in queryset_sizes:
-        queryVecs = xq[np.random.choice(xq.shape[0], n_rows, replace=False)]
-        cuvs_gpu_search_time = bench_search_milliseconds(
-            index, xb, queryVecs, args.nprobe, args.k, True)
-        if args.cuvs_only:
-            print("Method: IVFPQ, Operation: SEARCH, dim: %d, n_centroids: %d, numSubQuantizers %d, bitsPerCode %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, cuVS enabled GPU search time: %.3f milliseconds" % (
-                n_cols, nlist, M, args.bits_per_code, n_add, n_rows, args.nprobe, args.k, cuvs_gpu_search_time))
-        else:
-            classical_gpu_search_time = bench_search_milliseconds(
-                index, xb, queryVecs, args.nprobe, args.k, False)
-            print("Method: IVFPQ, Operation: SEARCH, dim: %d, n_centroids: %d, numSubQuantizers %d, bitsPerCode %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, classical GPU search time: %.3f milliseconds, cuVS enabled GPU search time: %.3f milliseconds" % (
-                n_cols, nlist, M, args.bits_per_code, n_add, n_rows, args.nprobe, args.k, classical_gpu_search_time, cuvs_gpu_search_time))
\ No newline at end of file
+    index_cpu = faiss.IndexIVFPQ(quantizer, d, 1024, 32, 8, faiss.METRIC_L2)
+    index_cpu.train(xt)
+    index_cpu.add(xb)
+
+    cuvs_indices, cuvs_gpu_search_time = bench_search_milliseconds(
+        index_cpu, xq, args.nprobe, args.k, True)
+    classical_gpu_indices, classical_gpu_search_time = bench_search_milliseconds(
+        index_cpu, xq, args.nprobe, args.k, False)
+    cuvs_recall, cuvs_qps = eval_recall(cuvs_indices, cuvs_gpu_search_time)
+    classical_recall, classical_qps = eval_recall(classical_gpu_indices, classical_gpu_search_time)
+    print("SEARCH, dim: %d, nlist: %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, classical GPU qps: %.3f, cuVS enabled GPU qps: %.3f"  % (
+        d, args.nlist, nb, nq, args.nprobe, args.k, classical_qps, cuvs_qps))
diff --git a/faiss/gpu/GpuIndexIVFFlat.cu b/faiss/gpu/GpuIndexIVFFlat.cu
index ceeb2dda76..eb5dacc1cd 100644
--- a/faiss/gpu/GpuIndexIVFFlat.cu
+++ b/faiss/gpu/GpuIndexIVFFlat.cu
@@ -72,9 +72,6 @@ GpuIndexIVFFlat::GpuIndexIVFFlat(
                   config),
           ivfFlatConfig_(config),
           reserveMemoryVecs_(0) {
-    FAISS_THROW_IF_NOT_MSG(
-            !should_use_cuvs(config),
-            "GpuIndexIVFFlat: cuVS does not support separate coarseQuantizer");
     // We could have been passed an already trained coarse quantizer. There is
     // no other quantizer that we need to train, so this is sufficient
     if (this->is_trained) {
diff --git a/faiss/gpu/test/test_gpu_index.py b/faiss/gpu/test/test_gpu_index.py
index d3892e190d..7170667666 100755
--- a/faiss/gpu/test/test_gpu_index.py
+++ b/faiss/gpu/test/test_gpu_index.py
@@ -141,7 +141,6 @@ def test_ivfflat_cpu_coarse(self):
         # construct a GPU index using the same trained coarse quantizer
         # from the CPU index
         config = faiss.GpuIndexIVFFlatConfig()
-        config.use_cuvs = False
         idx_gpu = faiss.GpuIndexIVFFlat(res, q, d, nlist, faiss.METRIC_L2, config)
         assert(idx_gpu.is_trained)
         idx_gpu.add(xb)
@@ -156,6 +155,7 @@ def test_ivfflat_cpu_coarse(self):
         self.assertGreaterEqual((i_g == i_c).sum(), i_g.size * 0.9)
         self.assertTrue(np.allclose(d_g, d_c, rtol=5e-5, atol=5e-5))
 
+
     def test_ivfsq_pu_coarse(self):
         res = faiss.StandardGpuResources()
         d = 128
@@ -233,7 +233,6 @@ def test_ivfpq_cpu_coarse(self):
         # construct a GPU index using the same trained coarse quantizer
         # from the CPU index
         config = faiss.GpuIndexIVFPQConfig()
-        config.use_cuvs = False
         idx_gpu = faiss.GpuIndexIVFPQ(
             res, idx_coarse_cpu, d, nlist_lvl_2, 4, 8, faiss.METRIC_L2, config)
         assert(not idx_gpu.is_trained)

From f2dd5377076fa910758dad66b0c2459b72e79786 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Fri, 3 Jan 2025 04:32:26 -0800
Subject: [PATCH 49/51] rm hardcoding

---
 benchs/bench_ivfpq_cuvs.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/benchs/bench_ivfpq_cuvs.py b/benchs/bench_ivfpq_cuvs.py
index c542fd3c42..924f24038a 100644
--- a/benchs/bench_ivfpq_cuvs.py
+++ b/benchs/bench_ivfpq_cuvs.py
@@ -24,6 +24,7 @@
 import time
 import argparse
 import rmm
+import ctypes
 
 try:
     from faiss.contrib.datasets_fb import \
@@ -86,6 +87,8 @@ def aa(*args, **kwargs):
 print("args:", args)
 
 gt = gt[:, :args.k]
+nlist = args.nlist
+bits_per_code = args.bits_per_code
 
 rs = np.random.RandomState(123)
 
@@ -128,7 +131,7 @@ def bench_train_milliseconds(trainVecs, use_cuvs):
     cuvs_gpu_train_time = bench_train_milliseconds(xt, True)
     classical_gpu_train_time = bench_train_milliseconds(xt, False)
     print("TRAIN, dim: %d, nlist %d, numTrain: %d, classical GPU train time: %.3f milliseconds, cuVS enabled GPU train time: %.3f milliseconds" % (
-        d, args.nlist, nt, classical_gpu_train_time, cuvs_gpu_train_time))
+        d, nlist, nt, classical_gpu_train_time, cuvs_gpu_train_time))
 
 
 def bench_add_milliseconds(addVecs, index_cpu, use_cuvs):
@@ -152,7 +155,7 @@ def bench_add_milliseconds(addVecs, index_cpu, use_cuvs):
     cuvs_gpu_add_time = bench_add_milliseconds(xb, index_cpu, True)
     classical_gpu_add_time = bench_add_milliseconds(xb, index_cpu, False)
     print("ADD, dim: %d, nlist %d, numAdd: %d, classical GPU add time: %.3f milliseconds, cuVS enabled GPU add time: %.3f milliseconds" % (
-        d, args.nlist, nb, classical_gpu_add_time, cuvs_gpu_add_time))
+        d, nlist, nb, classical_gpu_add_time, cuvs_gpu_add_time))
 
 
 def bench_search_milliseconds(index, queryVecs, nprobe, k, use_cuvs):
@@ -181,4 +184,4 @@ def bench_search_milliseconds(index, queryVecs, nprobe, k, use_cuvs):
     cuvs_recall, cuvs_qps = eval_recall(cuvs_indices, cuvs_gpu_search_time)
     classical_recall, classical_qps = eval_recall(classical_gpu_indices, classical_gpu_search_time)
     print("SEARCH, dim: %d, nlist: %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, classical GPU qps: %.3f, cuVS enabled GPU qps: %.3f"  % (
-        d, args.nlist, nb, nq, args.nprobe, args.k, classical_qps, cuvs_qps))
+        d, nlist, nb, nq, args.nprobe, args.k, classical_qps, cuvs_qps))

From 4b54efbe5aba7f3f323056b56b22f0d54f4e2a3c Mon Sep 17 00:00:00 2001
From: Tarang Jain <tarangj@tarangj-mlt.client.nvidia.com>
Date: Thu, 23 Jan 2025 14:43:08 +0530
Subject: [PATCH 50/51] hidden viz for rem src files

---
 faiss/gpu/CMakeLists.txt | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/faiss/gpu/CMakeLists.txt b/faiss/gpu/CMakeLists.txt
index 16574aab61..04d28907d1 100644
--- a/faiss/gpu/CMakeLists.txt
+++ b/faiss/gpu/CMakeLists.txt
@@ -273,7 +273,7 @@ if(FAISS_ENABLE_CUVS)
   target_compile_definitions(faiss_avx512_spr PUBLIC USE_NVIDIA_CUVS=1)
 
   # Mark all functions as hidden so that we don't generate
-  # global 'public' functions that also exist in libraft.so
+  # global 'public' functions that also exist in libcuvs.so
   #
   # This ensures that faiss functions will call the local version
   # inside libfaiss.so . This is needed to ensure that things
@@ -285,8 +285,13 @@ if(FAISS_ENABLE_CUVS)
   # respective classes/types in the headers are explicitly marked
   # as 'public' so they can be used by consumers
   set_source_files_properties(
+    GpuIndexCagra.cu
     GpuDistance.cu
+    GpuIndexIVFFlat.cu
+    GpuIndexIVFPQ.cu
+    GpuIndexFlat.cu
     StandardGpuResources.cpp
+    impl/CuvsCagra.cu
     impl/CuvsFlatIndex.cu
     impl/CuvsIVFFlat.cu
     impl/CuvsIVFPQ.cu

From edf196fd2541e40f831a257b667afcfa57e492d6 Mon Sep 17 00:00:00 2001
From: Tarang Jain <jaintarang2015@gmail.com>
Date: Tue, 28 Jan 2025 09:23:52 -0800
Subject: [PATCH 51/51] disable cpu coarse for cuvs-ivfpq

---
 faiss/gpu/test/test_gpu_index.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/faiss/gpu/test/test_gpu_index.py b/faiss/gpu/test/test_gpu_index.py
index 7170667666..287f27d958 100755
--- a/faiss/gpu/test/test_gpu_index.py
+++ b/faiss/gpu/test/test_gpu_index.py
@@ -233,6 +233,7 @@ def test_ivfpq_cpu_coarse(self):
         # construct a GPU index using the same trained coarse quantizer
         # from the CPU index
         config = faiss.GpuIndexIVFPQConfig()
+        config.use_cuvs = False
         idx_gpu = faiss.GpuIndexIVFPQ(
             res, idx_coarse_cpu, d, nlist_lvl_2, 4, 8, faiss.METRIC_L2, config)
         assert(not idx_gpu.is_trained)