@@ -197,6 +197,10 @@ function(generate_ivf_interleaved_code)
197
197
"64|2048|8"
198
198
)
199
199
200
+ if (USE_ROCM)
201
+ list (TRANSFORM FAISS_GPU_SRC REPLACE cu$ hip)
202
+ endif ()
203
+
200
204
# Traverse through the Cartesian product of X and Y
201
205
foreach (sub_codec ${SUB_CODEC_TYPE} )
202
206
foreach (metric_type ${SUB_METRIC_TYPE} )
@@ -210,10 +214,10 @@ function(generate_ivf_interleaved_code)
210
214
set (filename "template_${sub_codec} _${metric_type} _${sub_threads} _${sub_num_warp_q} _${sub_num_thread_q} " )
211
215
# Remove illegal characters from filename
212
216
string (REGEX REPLACE "[^A-Za-z0-9_]" "" filename ${filename} )
213
- set (output_file "${CMAKE_CURRENT_BINARY_DIR} /${filename} .cu " )
217
+ set (output_file "${CMAKE_CURRENT_BINARY_DIR} /${filename} .${GPU_EXT_PREFIX} " )
214
218
215
219
# Read the template file
216
- file (READ "${CMAKE_CURRENT_SOURCE_DIR} /impl/scan/IVFInterleavedScanKernelTemplate.cu " template_content)
220
+ file (READ "${CMAKE_CURRENT_SOURCE_DIR} /impl/scan/IVFInterleavedScanKernelTemplate.${GPU_EXT_PREFIX} " template_content)
217
221
218
222
# Replace the placeholders
219
223
string (REPLACE "SUB_CODEC_TYPE" "${sub_codec} " template_content "${template_content} " )
@@ -290,6 +294,10 @@ if(FAISS_ENABLE_RAFT)
290
294
target_compile_definitions (faiss_gpu PUBLIC USE_NVIDIA_RAFT=1)
291
295
endif ()
292
296
297
+ if (USE_ROCM)
298
+ list (TRANSFORM FAISS_GPU_SRC REPLACE cu$ hip)
299
+ endif ()
300
+
293
301
# Export FAISS_GPU_HEADERS variable to parent scope.
294
302
set (FAISS_GPU_HEADERS ${FAISS_GPU_HEADERS} PARENT_SCOPE)
295
303
@@ -305,21 +313,26 @@ foreach(header ${FAISS_GPU_HEADERS})
305
313
)
306
314
endforeach ()
307
315
308
- # Prepares a host linker script and enables host linker to support
309
- # very large device object files.
310
- # This is what CUDA 11.5+ `nvcc -hls=gen-lcs -aug-hls` would generate
311
- file (WRITE "${CMAKE_CURRENT_BINARY_DIR} /fatbin.ld"
312
- [=[
313
- SECTIONS
314
- {
315
- .nvFatBinSegment : { *(.nvFatBinSegment) }
316
- __nv_relfatbin : { *(__nv_relfatbin) }
317
- .nv_fatbin : { *(.nv_fatbin) }
318
- }
319
- ]=]
320
- )
321
- target_link_options (faiss_gpu PRIVATE "${CMAKE_CURRENT_BINARY_DIR} /fatbin.ld" )
316
+ if (USE_ROCM)
317
+ target_link_libraries (faiss_gpu PRIVATE $<$<BOOL :${USE_ROCM} >:hip::host> $<$<BOOL :${USE_ROCM} >:roc::hipblas>)
318
+ target_compile_options (faiss_gpu PRIVATE )
319
+ else ()
320
+ # Prepares a host linker script and enables host linker to support
321
+ # very large device object files.
322
+ # This is what CUDA 11.5+ `nvcc -hls=gen-lcs -aug-hls` would generate
323
+ file (WRITE "${CMAKE_CURRENT_BINARY_DIR} /fatbin.ld"
324
+ [=[
325
+ SECTIONS
326
+ {
327
+ .nvFatBinSegment : { *(.nvFatBinSegment) }
328
+ __nv_relfatbin : { *(__nv_relfatbin) }
329
+ .nv_fatbin : { *(.nv_fatbin) }
330
+ }
331
+ ]=]
332
+ )
333
+ target_link_options (faiss_gpu PRIVATE "${CMAKE_CURRENT_BINARY_DIR} /fatbin.ld" )
322
334
323
- find_package (CUDAToolkit REQUIRED)
324
- target_link_libraries (faiss_gpu PRIVATE CUDA::cudart CUDA::cublas $<$<BOOL :${FAISS_ENABLE_RAFT} >:raft::raft> $<$<BOOL :${FAISS_ENABLE_RAFT} >:raft::compiled> $<$<BOOL :${FAISS_ENABLE_RAFT} >:nvidia::cutlass::cutlass> $<$<BOOL :${FAISS_ENABLE_RAFT} >:OpenMP::OpenMP_CXX>)
325
- target_compile_options (faiss_gpu PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all --expt-extended-lambda --expt-relaxed-constexpr $<$<BOOL :${FAISS_ENABLE_RAFT} >:-Xcompiler=${OpenMP_CXX_FLAGS} >>)
335
+ find_package (CUDAToolkit REQUIRED)
336
+ target_link_libraries (faiss_gpu PRIVATE CUDA::cudart CUDA::cublas $<$<BOOL :${FAISS_ENABLE_RAFT} >:raft::raft> $<$<BOOL :${FAISS_ENABLE_RAFT} >:raft::compiled> $<$<BOOL :${FAISS_ENABLE_RAFT} >:nvidia::cutlass::cutlass> $<$<BOOL :${FAISS_ENABLE_RAFT} >:OpenMP::OpenMP_CXX>)
337
+ target_compile_options (faiss_gpu PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all --expt-extended-lambda --expt-relaxed-constexpr $<$<BOOL :${FAISS_ENABLE_RAFT} >:-Xcompiler=${OpenMP_CXX_FLAGS} >>)
338
+ endif ()
0 commit comments